summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/Kconfig29
-rw-r--r--arch/alpha/kernel/perf_event.c4
-rw-r--r--arch/alpha/kernel/vmlinux.lds.S18
-rw-r--r--arch/arc/kernel/vmlinux.lds.S6
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/dts/am3517.dtsi31
-rw-r--r--arch/arm/boot/dts/am3517_mt_ventoux.dts2
-rw-r--r--arch/arm/boot/dts/am571x-idk.dts27
-rw-r--r--arch/arm/boot/dts/am572x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am574x-idk.dts5
-rw-r--r--arch/arm/boot/dts/am57xx-idk-common.dtsi5
-rw-r--r--arch/arm/boot/dts/dra7-l4.dtsi52
-rw-r--r--arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts2
-rw-r--r--arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts2
-rw-r--r--arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi6
-rw-r--r--arch/arm/boot/dts/omap3-beagle-xm.dts2
-rw-r--r--arch/arm/boot/dts/omap3-beagle.dts2
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3530.dts2
-rw-r--r--arch/arm/boot/dts/omap3-cm-t3730.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-lcd43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000-lcd70.dts2
-rw-r--r--arch/arm/boot/dts/omap3-devkit8000.dts2
-rw-r--r--arch/arm/boot/dts/omap3-gta04.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-ha-lcd.dts2
-rw-r--r--arch/arm/boot/dts/omap3-ha.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0020-rev-f.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0020.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0030-rev-g.dts2
-rw-r--r--arch/arm/boot/dts/omap3-igep0030.dts2
-rw-r--r--arch/arm/boot/dts/omap3-ldp.dts2
-rw-r--r--arch/arm/boot/dts/omap3-lilly-a83x.dtsi2
-rw-r--r--arch/arm/boot/dts/omap3-lilly-dbb056.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n9.dts2
-rw-r--r--arch/arm/boot/dts/omap3-n900.dts6
-rw-r--r--arch/arm/boot/dts/omap3-n950-n9.dtsi7
-rw-r--r--arch/arm/boot/dts/omap3-n950.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-alto35.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-gallop43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-palo35.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-palo43.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-summit.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-tobi.dts2
-rw-r--r--arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts2
-rw-r--r--arch/arm/boot/dts/omap3-pandora-1ghz.dts2
-rw-r--r--arch/arm/boot/dts/omap3-pandora-common.dtsi36
-rw-r--r--arch/arm/boot/dts/omap3-sbc-t3530.dts2
-rw-r--r--arch/arm/boot/dts/omap3-sbc-t3730.dts2
-rw-r--r--arch/arm/boot/dts/omap3-sniper.dts2
-rw-r--r--arch/arm/boot/dts/omap3-thunder.dts2
-rw-r--r--arch/arm/boot/dts/omap3-zoom3.dts2
-rw-r--r--arch/arm/boot/dts/omap3430-sdp.dts2
-rw-r--r--arch/arm/boot/dts/omap34xx.dtsi66
-rw-r--r--arch/arm/boot/dts/omap36xx.dtsi65
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/crypto/Kconfig36
-rw-r--r--arch/arm/crypto/Makefile49
-rw-r--r--arch/arm/crypto/chacha-glue.c343
-rw-r--r--arch/arm/crypto/chacha-neon-glue.c202
-rw-r--r--arch/arm/crypto/chacha-scalar-core.S460
-rw-r--r--arch/arm/crypto/crct10dif-ce-core.S2
-rw-r--r--arch/arm/crypto/curve25519-core.S2062
-rw-r--r--arch/arm/crypto/curve25519-glue.c127
-rw-r--r--arch/arm/crypto/ghash-ce-core.S1
-rw-r--r--arch/arm/crypto/poly1305-armv4.pl1236
-rw-r--r--arch/arm/crypto/poly1305-core.S_shipped1158
-rw-r--r--arch/arm/crypto/poly1305-glue.c276
-rw-r--r--arch/arm/crypto/sha1-ce-core.S1
-rw-r--r--arch/arm/crypto/sha2-ce-core.S1
-rw-r--r--arch/arm/kernel/vmlinux-xip.lds.S4
-rw-r--r--arch/arm/kernel/vmlinux.lds.S4
-rw-r--r--arch/arm/mach-imx/cpuidle-imx6q.c4
-rw-r--r--arch/arm/mach-omap2/Makefile3
-rw-r--r--arch/arm/mach-omap2/common.h1
-rw-r--r--arch/arm/mach-omap2/hsmmc.c171
-rw-r--r--arch/arm/mach-omap2/hsmmc.h32
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c119
-rw-r--r--arch/arm/mach-pxa/icontrol.c9
-rw-r--r--arch/arm/mach-pxa/zeus.c9
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra20.c2
-rw-r--r--arch/arm/plat-pxa/ssp.c4
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts1
-rw-r--r--arch/arm64/crypto/Kconfig17
-rw-r--r--arch/arm64/crypto/Makefile10
-rw-r--r--arch/arm64/crypto/aes-neonbs-glue.c2
-rw-r--r--arch/arm64/crypto/chacha-neon-glue.c81
-rw-r--r--arch/arm64/crypto/ghash-ce-core.S501
-rw-r--r--arch/arm64/crypto/ghash-ce-glue.c293
-rw-r--r--arch/arm64/crypto/poly1305-armv8.pl913
-rw-r--r--arch/arm64/crypto/poly1305-core.S_shipped835
-rw-r--r--arch/arm64/crypto/poly1305-glue.c237
-rw-r--r--arch/arm64/kernel/hw_breakpoint.c8
-rw-r--r--arch/arm64/kernel/smp.c11
-rw-r--r--arch/arm64/kernel/vmlinux.lds.S10
-rw-r--r--arch/arm64/mm/mmu.c2
-rw-r--r--arch/c6x/kernel/vmlinux.lds.S8
-rw-r--r--arch/csky/kernel/vmlinux.lds.S5
-rw-r--r--arch/h8300/kernel/vmlinux.lds.S9
-rw-r--r--arch/hexagon/kernel/vmlinux.lds.S5
-rw-r--r--arch/ia64/kernel/setup.c2
-rw-r--r--arch/ia64/kernel/time.c4
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S20
-rw-r--r--arch/m68k/kernel/vmlinux-nommu.lds4
-rw-r--r--arch/m68k/kernel/vmlinux-std.lds4
-rw-r--r--arch/m68k/kernel/vmlinux-sun3.lds4
-rw-r--r--arch/microblaze/Kconfig1
-rw-r--r--arch/microblaze/configs/mmu_defconfig3
-rw-r--r--arch/microblaze/include/asm/irq.h1
-rw-r--r--arch/microblaze/kernel/entry.S5
-rw-r--r--arch/microblaze/kernel/head.S2
-rw-r--r--arch/microblaze/kernel/vmlinux.lds.S10
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/Makefile2
-rw-r--r--arch/mips/crypto/Makefile18
-rw-r--r--arch/mips/crypto/chacha-core.S497
-rw-r--r--arch/mips/crypto/chacha-glue.c150
-rw-r--r--arch/mips/crypto/poly1305-glue.c203
-rw-r--r--arch/mips/crypto/poly1305-mips.pl1273
-rw-r--r--arch/mips/kernel/vmlinux.lds.S15
-rw-r--r--arch/mips/ralink/Kconfig1
-rw-r--r--arch/nds32/kernel/vmlinux.lds.S5
-rw-r--r--arch/nios2/kernel/vmlinux.lds.S5
-rw-r--r--arch/openrisc/kernel/vmlinux.lds.S7
-rw-r--r--arch/parisc/kernel/vmlinux.lds.S11
-rw-r--r--arch/powerpc/crypto/aes-spe-glue.c454
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h3
-rw-r--r--arch/powerpc/include/asm/local.h2
-rw-r--r--arch/powerpc/include/asm/security_features.h3
-rw-r--r--arch/powerpc/kernel/entry_64.S6
-rw-r--r--arch/powerpc/kernel/security.c57
-rw-r--r--arch/powerpc/kernel/time.c6
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S37
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S30
-rw-r--r--arch/powerpc/perf/core-book3s.c18
-rw-r--r--arch/powerpc/platforms/pseries/dtl.c38
-rw-r--r--arch/powerpc/platforms/pseries/hvCall_inst.c12
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c15
-rw-r--r--arch/riscv/Kconfig52
-rw-r--r--arch/riscv/Makefile13
-rw-r--r--arch/riscv/boot/Makefile19
-rw-r--r--arch/riscv/boot/dts/sifive/fu540-c000.dtsi7
-rw-r--r--arch/riscv/boot/loader.S8
-rw-r--r--arch/riscv/boot/loader.lds.S16
-rw-r--r--arch/riscv/configs/nommu_virt_defconfig78
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h1
-rw-r--r--arch/riscv/include/asm/cache.h8
-rw-r--r--arch/riscv/include/asm/clint.h39
-rw-r--r--arch/riscv/include/asm/csr.h74
-rw-r--r--arch/riscv/include/asm/current.h6
-rw-r--r--arch/riscv/include/asm/elf.h4
-rw-r--r--arch/riscv/include/asm/fixmap.h2
-rw-r--r--arch/riscv/include/asm/ftrace.h5
-rw-r--r--arch/riscv/include/asm/futex.h12
-rw-r--r--arch/riscv/include/asm/hwcap.h7
-rw-r--r--arch/riscv/include/asm/image.h6
-rw-r--r--arch/riscv/include/asm/io.h149
-rw-r--r--arch/riscv/include/asm/irqflags.h12
-rw-r--r--arch/riscv/include/asm/kprobes.h6
-rw-r--r--arch/riscv/include/asm/mmio.h168
-rw-r--r--arch/riscv/include/asm/mmiowb.h2
-rw-r--r--arch/riscv/include/asm/mmu.h3
-rw-r--r--arch/riscv/include/asm/page.h10
-rw-r--r--arch/riscv/include/asm/pci.h6
-rw-r--r--arch/riscv/include/asm/pgalloc.h2
-rw-r--r--arch/riscv/include/asm/pgtable.h94
-rw-r--r--arch/riscv/include/asm/processor.h2
-rw-r--r--arch/riscv/include/asm/ptrace.h16
-rw-r--r--arch/riscv/include/asm/sbi.h11
-rw-r--r--arch/riscv/include/asm/seccomp.h10
-rw-r--r--arch/riscv/include/asm/sparsemem.h6
-rw-r--r--arch/riscv/include/asm/spinlock_types.h2
-rw-r--r--arch/riscv/include/asm/switch_to.h10
-rw-r--r--arch/riscv/include/asm/thread_info.h5
-rw-r--r--arch/riscv/include/asm/timex.h19
-rw-r--r--arch/riscv/include/asm/tlbflush.h12
-rw-r--r--arch/riscv/include/asm/uaccess.h4
-rw-r--r--arch/riscv/include/uapi/asm/elf.h6
-rw-r--r--arch/riscv/include/uapi/asm/hwcap.h6
-rw-r--r--arch/riscv/include/uapi/asm/ucontext.h6
-rw-r--r--arch/riscv/kernel/Makefile5
-rw-r--r--arch/riscv/kernel/asm-offsets.c8
-rw-r--r--arch/riscv/kernel/clint.c44
-rw-r--r--arch/riscv/kernel/cpu.c45
-rw-r--r--arch/riscv/kernel/entry.S112
-rw-r--r--arch/riscv/kernel/fpu.S8
-rw-r--r--arch/riscv/kernel/head.S112
-rw-r--r--arch/riscv/kernel/irq.c17
-rw-r--r--arch/riscv/kernel/module.c4
-rw-r--r--arch/riscv/kernel/perf_callchain.c2
-rw-r--r--arch/riscv/kernel/process.c17
-rw-r--r--arch/riscv/kernel/ptrace.c10
-rw-r--r--arch/riscv/kernel/reset.c5
-rw-r--r--arch/riscv/kernel/sbi.c17
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/signal.c38
-rw-r--r--arch/riscv/kernel/smp.c16
-rw-r--r--arch/riscv/kernel/smpboot.c4
-rw-r--r--arch/riscv/kernel/traps.c16
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S5
-rw-r--r--arch/riscv/lib/Makefile11
-rw-r--r--arch/riscv/lib/uaccess.S12
-rw-r--r--arch/riscv/mm/Makefile3
-rw-r--r--arch/riscv/mm/cacheflush.c26
-rw-r--r--arch/riscv/mm/context.c2
-rw-r--r--arch/riscv/mm/extable.c4
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/riscv/mm/init.c28
-rw-r--r--arch/riscv/mm/tlbflush.c25
-rw-r--r--arch/s390/configs/debug_defconfig1
-rw-r--r--arch/s390/crypto/aes_s390.c609
-rw-r--r--arch/s390/crypto/des_s390.c419
-rw-r--r--arch/s390/crypto/paes_s390.c414
-rw-r--r--arch/s390/kernel/vmlinux.lds.S12
-rw-r--r--arch/s390/kernel/vtime.c4
-rw-r--r--arch/s390/net/bpf_jit_comp.c502
-rw-r--r--arch/sh/boards/mach-sdk7786/nmi.c2
-rw-r--r--arch/sh/boot/compressed/misc.c5
-rw-r--r--arch/sh/drivers/Makefile2
-rw-r--r--arch/sh/drivers/pci/fixups-sdk7786.c2
-rw-r--r--arch/sh/drivers/platform_early.c347
-rw-r--r--arch/sh/include/asm/platform_early.h61
-rw-r--r--arch/sh/include/cpu-sh4/cpu/sh7734.h2
-rw-r--r--arch/sh/kernel/cpu/sh2/setup-sh7619.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-mxg.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7201.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7203.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7206.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7264.c3
-rw-r--r--arch/sh/kernel/cpu/sh2a/setup-sh7269.c3
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh3.c1
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7705.c3
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh770x.c3
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7710.c3
-rw-r--r--arch/sh/kernel/cpu/sh3/setup-sh7720.c3
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh4-202.c3
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7750.c9
-rw-r--r--arch/sh/kernel/cpu/sh4/setup-sh7760.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7343.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7366.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7722.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7723.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7724.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7734.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7757.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7763.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7770.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7780.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7785.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-sh7786.c3
-rw-r--r--arch/sh/kernel/cpu/sh4a/setup-shx3.c3
-rw-r--r--arch/sh/kernel/cpu/sh5/setup-sh5.c3
-rw-r--r--arch/sh/kernel/io_trapped.c2
-rw-r--r--arch/sh/kernel/setup.c5
-rw-r--r--arch/sh/kernel/time.c5
-rw-r--r--arch/sh/kernel/vmlinux.lds.S3
-rw-r--r--arch/sh/mm/consistent.c5
-rw-r--r--arch/sparc/crypto/aes_glue.c310
-rw-r--r--arch/sparc/crypto/camellia_glue.c217
-rw-r--r--arch/sparc/crypto/des_glue.c499
-rw-r--r--arch/sparc/kernel/smp_64.c6
-rw-r--r--arch/sparc/kernel/vmlinux.lds.S3
-rw-r--r--arch/um/include/asm/common.lds.S3
-rw-r--r--arch/unicore32/kernel/vmlinux.lds.S5
-rw-r--r--arch/x86/Kconfig75
-rw-r--r--arch/x86/Kconfig.cpu25
-rw-r--r--arch/x86/Kconfig.debug4
-rw-r--r--arch/x86/Makefile_32.cpu1
-rw-r--r--arch/x86/boot/Makefile3
-rw-r--r--arch/x86/boot/compressed/Makefile5
-rw-r--r--arch/x86/boot/compressed/eboot.c9
-rw-r--r--arch/x86/boot/compressed/efi_stub_32.S4
-rw-r--r--arch/x86/boot/compressed/efi_thunk_64.S33
-rw-r--r--arch/x86/boot/compressed/head_32.S15
-rw-r--r--arch/x86/boot/compressed/head_64.S63
-rw-r--r--arch/x86/boot/compressed/kaslr.c58
-rw-r--r--arch/x86/boot/compressed/kernel_info.S22
-rw-r--r--arch/x86/boot/compressed/mem_encrypt.S11
-rw-r--r--arch/x86/boot/copy.S16
-rw-r--r--arch/x86/boot/header.S3
-rw-r--r--arch/x86/boot/pmjump.S10
-rw-r--r--arch/x86/boot/tools/build.c5
-rw-r--r--arch/x86/configs/x86_64_defconfig1
-rw-r--r--arch/x86/crypto/Makefile3
-rw-r--r--arch/x86/crypto/aegis128-aesni-asm.S36
-rw-r--r--arch/x86/crypto/aes_ctrby8_avx-x86_64.S12
-rw-r--r--arch/x86/crypto/aesni-intel_asm.S114
-rw-r--r--arch/x86/crypto/aesni-intel_avx-x86_64.S32
-rw-r--r--arch/x86/crypto/blake2s-core.S258
-rw-r--r--arch/x86/crypto/blake2s-glue.c233
-rw-r--r--arch/x86/crypto/blowfish-x86_64-asm_64.S16
-rw-r--r--arch/x86/crypto/camellia-aesni-avx-asm_64.S44
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S44
-rw-r--r--arch/x86/crypto/camellia-x86_64-asm_64.S16
-rw-r--r--arch/x86/crypto/cast5-avx-x86_64-asm_64.S24
-rw-r--r--arch/x86/crypto/cast6-avx-x86_64-asm_64.S32
-rw-r--r--arch/x86/crypto/chacha-avx2-x86_64.S12
-rw-r--r--arch/x86/crypto/chacha-avx512vl-x86_64.S12
-rw-r--r--arch/x86/crypto/chacha-ssse3-x86_64.S16
-rw-r--r--arch/x86/crypto/chacha_glue.c184
-rw-r--r--arch/x86/crypto/crc32-pclmul_asm.S4
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S4
-rw-r--r--arch/x86/crypto/crct10dif-pcl-asm_64.S4
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c2475
-rw-r--r--arch/x86/crypto/des3_ede-asm_64.S8
-rw-r--r--arch/x86/crypto/ghash-clmulni-intel_asm.S12
-rw-r--r--arch/x86/crypto/nh-avx2-x86_64.S4
-rw-r--r--arch/x86/crypto/nh-sse2-x86_64.S4
-rw-r--r--arch/x86/crypto/poly1305-avx2-x86_64.S4
-rw-r--r--arch/x86/crypto/poly1305-sse2-x86_64.S8
-rw-r--r--arch/x86/crypto/poly1305_glue.c199
-rw-r--r--arch/x86/crypto/serpent-avx-x86_64-asm_64.S32
-rw-r--r--arch/x86/crypto/serpent-avx2-asm_64.S32
-rw-r--r--arch/x86/crypto/serpent-sse2-i586-asm_32.S8
-rw-r--r--arch/x86/crypto/serpent-sse2-x86_64-asm_64.S8
-rw-r--r--arch/x86/crypto/sha1_avx2_x86_64_asm.S4
-rw-r--r--arch/x86/crypto/sha1_ni_asm.S4
-rw-r--r--arch/x86/crypto/sha1_ssse3_asm.S4
-rw-r--r--arch/x86/crypto/sha256-avx-asm.S4
-rw-r--r--arch/x86/crypto/sha256-avx2-asm.S4
-rw-r--r--arch/x86/crypto/sha256-ssse3-asm.S4
-rw-r--r--arch/x86/crypto/sha256_ni_asm.S4
-rw-r--r--arch/x86/crypto/sha512-avx-asm.S4
-rw-r--r--arch/x86/crypto/sha512-avx2-asm.S4
-rw-r--r--arch/x86/crypto/sha512-ssse3-asm.S4
-rw-r--r--arch/x86/crypto/twofish-avx-x86_64-asm_64.S32
-rw-r--r--arch/x86/crypto/twofish-i586-asm_32.S8
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64-3way.S8
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S8
-rw-r--r--arch/x86/entry/calling.h2
-rw-r--r--arch/x86/entry/common.c4
-rw-r--r--arch/x86/entry/entry_32.S379
-rw-r--r--arch/x86/entry/entry_64.S112
-rw-r--r--arch/x86/entry/entry_64_compat.S16
-rw-r--r--arch/x86/entry/syscall_32.c8
-rw-r--r--arch/x86/entry/syscall_64.c14
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl8
-rw-r--r--arch/x86/entry/thunk_32.S4
-rw-r--r--arch/x86/entry/thunk_64.S7
-rw-r--r--arch/x86/entry/vdso/Makefile2
-rw-r--r--arch/x86/entry/vdso/vdso32/system_call.S2
-rw-r--r--arch/x86/events/amd/core.c13
-rw-r--r--arch/x86/events/core.c8
-rw-r--r--arch/x86/events/intel/bts.c8
-rw-r--r--arch/x86/events/intel/core.c12
-rw-r--r--arch/x86/events/intel/lbr.c23
-rw-r--r--arch/x86/events/intel/p4.c5
-rw-r--r--arch/x86/events/intel/pt.c203
-rw-r--r--arch/x86/events/intel/pt.h12
-rw-r--r--arch/x86/events/perf_event.h11
-rw-r--r--arch/x86/hyperv/hv_apic.c16
-rw-r--r--arch/x86/hyperv/hv_init.c6
-rw-r--r--arch/x86/ia32/ia32_signal.c5
-rw-r--r--arch/x86/include/asm/asm.h14
-rw-r--r--arch/x86/include/asm/calgary.h57
-rw-r--r--arch/x86/include/asm/cpu_entry_area.h18
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/crash.h9
-rw-r--r--arch/x86/include/asm/disabled-features.h2
-rw-r--r--arch/x86/include/asm/e820/types.h8
-rw-r--r--arch/x86/include/asm/efi.h17
-rw-r--r--arch/x86/include/asm/emulate_prefix.h14
-rw-r--r--arch/x86/include/asm/fixmap.h2
-rw-r--r--arch/x86/include/asm/ftrace.h13
-rw-r--r--arch/x86/include/asm/hyperv-tlfs.h5
-rw-r--r--arch/x86/include/asm/insn.h6
-rw-r--r--arch/x86/include/asm/io_bitmap.h29
-rw-r--r--arch/x86/include/asm/kexec.h10
-rw-r--r--arch/x86/include/asm/linkage.h4
-rw-r--r--arch/x86/include/asm/module.h2
-rw-r--r--arch/x86/include/asm/paravirt.h4
-rw-r--r--arch/x86/include/asm/paravirt_types.h2
-rw-r--r--arch/x86/include/asm/pci.h7
-rw-r--r--arch/x86/include/asm/pci_64.h28
-rw-r--r--arch/x86/include/asm/pgtable-3level.h46
-rw-r--r--arch/x86/include/asm/pgtable_32_types.h8
-rw-r--r--arch/x86/include/asm/processor.h123
-rw-r--r--arch/x86/include/asm/ptrace.h6
-rw-r--r--arch/x86/include/asm/purgatory.h10
-rw-r--r--arch/x86/include/asm/refcount.h126
-rw-r--r--arch/x86/include/asm/rio.h64
-rw-r--r--arch/x86/include/asm/sections.h1
-rw-r--r--arch/x86/include/asm/segment.h12
-rw-r--r--arch/x86/include/asm/switch_to.h10
-rw-r--r--arch/x86/include/asm/syscall_wrapper.h76
-rw-r--r--arch/x86/include/asm/tce.h35
-rw-r--r--arch/x86/include/asm/text-patching.h24
-rw-r--r--arch/x86/include/asm/thread_info.h14
-rw-r--r--arch/x86/include/asm/trace/hyperv.h15
-rw-r--r--arch/x86/include/asm/umip.h4
-rw-r--r--arch/x86/include/asm/unwind_hints.h8
-rw-r--r--arch/x86/include/asm/uv/bios.h2
-rw-r--r--arch/x86/include/asm/uv/uv.h16
-rw-r--r--arch/x86/include/asm/uv/uv_hub.h61
-rw-r--r--arch/x86/include/asm/xen/hypervisor.h2
-rw-r--r--arch/x86/include/asm/xen/interface.h11
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h41
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S9
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S10
-rw-r--r--arch/x86/kernel/alternative.c132
-rw-r--r--arch/x86/kernel/amd_gart_64.c12
-rw-r--r--arch/x86/kernel/apic/apic.c43
-rw-r--r--arch/x86/kernel/apic/io_apic.c25
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c184
-rw-r--r--arch/x86/kernel/cpu/bugs.c30
-rw-r--r--arch/x86/kernel/cpu/common.c193
-rw-r--r--arch/x86/kernel/cpu/intel.c8
-rw-r--r--arch/x86/kernel/cpu/mshyperv.c7
-rw-r--r--arch/x86/kernel/cpu/rdrand.c22
-rw-r--r--arch/x86/kernel/crash.c128
-rw-r--r--arch/x86/kernel/doublefault.c5
-rw-r--r--arch/x86/kernel/e820.c23
-rw-r--r--arch/x86/kernel/fpu/xstate.c22
-rw-r--r--arch/x86/kernel/ftrace.c14
-rw-r--r--arch/x86/kernel/ftrace_32.S23
-rw-r--r--arch/x86/kernel/ftrace_64.S89
-rw-r--r--arch/x86/kernel/head_32.S72
-rw-r--r--arch/x86/kernel/head_64.S113
-rw-r--r--arch/x86/kernel/ioport.c209
-rw-r--r--arch/x86/kernel/irqflags.S8
-rw-r--r--arch/x86/kernel/jailhouse.c136
-rw-r--r--arch/x86/kernel/jump_label.c9
-rw-r--r--arch/x86/kernel/kdebugfs.c21
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/kprobes/opt.c11
-rw-r--r--arch/x86/kernel/ksysfs.c31
-rw-r--r--arch/x86/kernel/machine_kexec_64.c47
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-calgary_64.c1586
-rw-r--r--arch/x86/kernel/pci-dma.c6
-rw-r--r--arch/x86/kernel/process.c205
-rw-r--r--arch/x86/kernel/process_32.c77
-rw-r--r--arch/x86/kernel/process_64.c86
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S13
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S13
-rw-r--r--arch/x86/kernel/setup.c42
-rw-r--r--arch/x86/kernel/setup_percpu.c4
-rw-r--r--arch/x86/kernel/tboot.c15
-rw-r--r--arch/x86/kernel/tce_64.c177
-rw-r--r--arch/x86/kernel/traps.c5
-rw-r--r--arch/x86/kernel/tsc_sync.c8
-rw-r--r--arch/x86/kernel/umip.c18
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/verify_cpu.S4
-rw-r--r--arch/x86/kernel/vmlinux.lds.S16
-rw-r--r--arch/x86/kernel/x86_init.c24
-rw-r--r--arch/x86/kvm/pmu.c4
-rw-r--r--arch/x86/kvm/vmx/vmenter.S12
-rw-r--r--arch/x86/kvm/vmx/vmx.c8
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--arch/x86/lib/atomic64_386_32.S4
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S32
-rw-r--r--arch/x86/lib/checksum_32.S16
-rw-r--r--arch/x86/lib/clear_page_64.S12
-rw-r--r--arch/x86/lib/cmpxchg16b_emu.S4
-rw-r--r--arch/x86/lib/cmpxchg8b_emu.S4
-rw-r--r--arch/x86/lib/copy_page_64.S8
-rw-r--r--arch/x86/lib/copy_user_64.S21
-rw-r--r--arch/x86/lib/csum-copy_64.S4
-rw-r--r--arch/x86/lib/getuser.S22
-rw-r--r--arch/x86/lib/hweight.S8
-rw-r--r--arch/x86/lib/insn.c34
-rw-r--r--arch/x86/lib/iomap_copy_64.S4
-rw-r--r--arch/x86/lib/memcpy_64.S20
-rw-r--r--arch/x86/lib/memmove_64.S8
-rw-r--r--arch/x86/lib/memset_64.S16
-rw-r--r--arch/x86/lib/msr-reg.S8
-rw-r--r--arch/x86/lib/putuser.S19
-rw-r--r--arch/x86/lib/retpoline.S4
-rw-r--r--arch/x86/lib/x86-opcode-map.txt18
-rw-r--r--arch/x86/math-emu/div_Xsig.S4
-rw-r--r--arch/x86/math-emu/div_small.S4
-rw-r--r--arch/x86/math-emu/fpu_system.h6
-rw-r--r--arch/x86/math-emu/mul_Xsig.S12
-rw-r--r--arch/x86/math-emu/polynom_Xsig.S4
-rw-r--r--arch/x86/math-emu/reg_ld_str.c6
-rw-r--r--arch/x86/math-emu/reg_norm.S8
-rw-r--r--arch/x86/math-emu/reg_round.S4
-rw-r--r--arch/x86/math-emu/reg_u_add.S4
-rw-r--r--arch/x86/math-emu/reg_u_div.S4
-rw-r--r--arch/x86/math-emu/reg_u_mul.S4
-rw-r--r--arch/x86/math-emu/reg_u_sub.S4
-rw-r--r--arch/x86/math-emu/round_Xsig.S8
-rw-r--r--arch/x86/math-emu/shr_Xsig.S4
-rw-r--r--arch/x86/math-emu/wm_shrx.S8
-rw-r--r--arch/x86/math-emu/wm_sqrt.S4
-rw-r--r--arch/x86/mm/Makefile4
-rw-r--r--arch/x86/mm/cpu_entry_area.c12
-rw-r--r--arch/x86/mm/extable.c49
-rw-r--r--arch/x86/mm/init.c8
-rw-r--r--arch/x86/mm/init_64.c16
-rw-r--r--arch/x86/mm/ioremap.c11
-rw-r--r--arch/x86/mm/kmmio.c7
-rw-r--r--arch/x86/mm/maccess.c43
-rw-r--r--arch/x86/mm/mem_encrypt_boot.S8
-rw-r--r--arch/x86/mm/mmio-mod.c6
-rw-r--r--arch/x86/mm/numa.c2
-rw-r--r--arch/x86/mm/numa_emulation.c4
-rw-r--r--arch/x86/mm/pat.c8
-rw-r--r--arch/x86/mm/pat_internal.h20
-rw-r--r--arch/x86/mm/pat_interval.c185
-rw-r--r--arch/x86/mm/pat_rbtree.c268
-rw-r--r--arch/x86/mm/pgtable.c4
-rw-r--r--arch/x86/mm/pti.c2
-rw-r--r--arch/x86/mm/testmmiotrace.c6
-rw-r--r--arch/x86/net/bpf_jit_comp.c620
-rw-r--r--arch/x86/oprofile/op_x86_model.h6
-rw-r--r--arch/x86/platform/efi/efi.c54
-rw-r--r--arch/x86/platform/efi/efi_stub_32.S4
-rw-r--r--arch/x86/platform/efi/efi_stub_64.S4
-rw-r--r--arch/x86/platform/efi/efi_thunk_64.S16
-rw-r--r--arch/x86/platform/efi/quirks.c3
-rw-r--r--arch/x86/platform/olpc/olpc-xo15-sci.c2
-rw-r--r--arch/x86/platform/olpc/xo1-wakeup.S3
-rw-r--r--arch/x86/platform/pvh/head.S18
-rw-r--r--arch/x86/platform/sfi/sfi.c3
-rw-r--r--arch/x86/platform/uv/bios_uv.c9
-rw-r--r--arch/x86/power/hibernate_asm_32.S14
-rw-r--r--arch/x86/power/hibernate_asm_64.S14
-rw-r--r--arch/x86/purgatory/entry64.S24
-rw-r--r--arch/x86/purgatory/purgatory.c19
-rw-r--r--arch/x86/purgatory/setup-x86_64.S14
-rw-r--r--arch/x86/purgatory/stack.S7
-rw-r--r--arch/x86/realmode/init.c2
-rw-r--r--arch/x86/realmode/rm/header.S8
-rw-r--r--arch/x86/realmode/rm/realmode.lds.S1
-rw-r--r--arch/x86/realmode/rm/reboot.S13
-rw-r--r--arch/x86/realmode/rm/stack.S14
-rw-r--r--arch/x86/realmode/rm/trampoline_32.S16
-rw-r--r--arch/x86/realmode/rm/trampoline_64.S29
-rw-r--r--arch/x86/realmode/rm/trampoline_common.S2
-rw-r--r--arch/x86/realmode/rm/wakeup_asm.S17
-rw-r--r--arch/x86/realmode/rmpiggy.S10
-rw-r--r--arch/x86/tools/gen-insn-attr-x86.awk4
-rw-r--r--arch/x86/um/vdso/vdso.S6
-rw-r--r--arch/x86/xen/enlighten_pv.c10
-rw-r--r--arch/x86/xen/setup.c2
-rw-r--r--arch/x86/xen/xen-asm.S28
-rw-r--r--arch/x86/xen/xen-asm_32.S80
-rw-r--r--arch/x86/xen/xen-asm_64.S34
-rw-r--r--arch/x86/xen/xen-head.S8
-rw-r--r--arch/xtensa/kernel/vmlinux.lds.S8
544 files changed, 21512 insertions, 8521 deletions
diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..17c42bc36321 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -892,27 +892,6 @@ config STRICT_MODULE_RWX
config ARCH_HAS_PHYS_TO_DMA
bool
-config ARCH_HAS_REFCOUNT
- bool
- help
- An architecture selects this when it has implemented refcount_t
- using open coded assembly primitives that provide an optimized
- refcount_t implementation, possibly at the expense of some full
- refcount state checks of CONFIG_REFCOUNT_FULL=y.
-
- The refcount overflow check behavior, however, must be retained.
- Catching overflows is the primary security concern for protecting
- against bugs in reference counts.
-
-config REFCOUNT_FULL
- bool "Perform full reference count validation at the expense of speed"
- help
- Enabling this switches the refcounting infrastructure from a fast
- unchecked atomic_t implementation to a fully state checked
- implementation, which can be (slightly) slower but provides protections
- against various use-after-free conditions that can be used in
- security flaw exploits.
-
config HAVE_ARCH_COMPILER_H
bool
help
@@ -960,6 +939,14 @@ config RELR
config ARCH_HAS_MEM_ENCRYPT
bool
+config HAVE_SPARSE_SYSCALL_NR
+ bool
+ help
+ An architecture should select this if its syscall numbering is sparse
+ to save space. For example, MIPS architecture has a syscall array with
+ entries at 4000, 5000 and 6000 locations. This option turns on syscall
+ related optimizations for a given architecture.
+
source "kernel/gcov/Kconfig"
source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c
index 4341ccf5c0c4..e7a59d927d78 100644
--- a/arch/alpha/kernel/perf_event.c
+++ b/arch/alpha/kernel/perf_event.c
@@ -824,7 +824,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
if (unlikely(la_ptr >= alpha_pmu->num_pmcs)) {
/* This should never occur! */
irq_err_count++;
- pr_warning("PMI: silly index %ld\n", la_ptr);
+ pr_warn("PMI: silly index %ld\n", la_ptr);
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
}
@@ -847,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr,
if (unlikely(!event)) {
/* This should never occur! */
irq_err_count++;
- pr_warning("PMI: No event at index %d!\n", idx);
+ pr_warn("PMI: No event at index %d!\n", idx);
wrperfmon(PERFMON_CMD_ENABLE, cpuc->idx_mask);
return;
}
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index c4b5ceceab52..bc6f727278fd 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -1,4 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
+
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/cache.h>
@@ -8,7 +12,7 @@
OUTPUT_FORMAT("elf64-alpha")
OUTPUT_ARCH(alpha)
ENTRY(__start)
-PHDRS { kernel PT_LOAD; note PT_NOTE; }
+PHDRS { text PT_LOAD; note PT_NOTE; }
jiffies = jiffies_64;
SECTIONS
{
@@ -27,17 +31,11 @@ SECTIONS
LOCK_TEXT
*(.fixup)
*(.gnu.warning)
- } :kernel
+ } :text
swapper_pg_dir = SWAPPER_PGD;
_etext = .; /* End of text section */
- NOTES :kernel :note
- .dummy : {
- *(.dummy)
- } :kernel
-
- RODATA
- EXCEPTION_TABLE(16)
+ RO_DATA(4096)
/* Will be freed after init */
__init_begin = ALIGN(PAGE_SIZE);
@@ -52,7 +50,7 @@ SECTIONS
_sdata = .; /* Start of rw data section */
_data = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.got : {
*(.got)
diff --git a/arch/arc/kernel/vmlinux.lds.S b/arch/arc/kernel/vmlinux.lds.S
index 6c693a9d29b6..54139a6f469b 100644
--- a/arch/arc/kernel/vmlinux.lds.S
+++ b/arch/arc/kernel/vmlinux.lds.S
@@ -95,13 +95,13 @@ SECTIONS
_etext = .;
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE)
/*
* 1. this is .data essentially
* 2. THREAD_SIZE for init.task, must be kernel-stk sz aligned
*/
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
@@ -118,8 +118,6 @@ SECTIONS
/DISCARD/ : { *(.eh_frame) }
#endif
- NOTES
-
. = ALIGN(PAGE_SIZE);
_end = . ;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 8a50efb559f3..0d3c5d7cceb7 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -117,7 +117,6 @@ config ARM
select OLD_SIGSUSPEND3
select PCI_SYSCALL if PCI
select PERF_USE_VMALLOC
- select REFCOUNT_FULL
select RTC_LIB
select SYS_SUPPORTS_APM_EMULATION
# Above selects are sorted alphabetically; please add new ones
diff --git a/arch/arm/boot/dts/am3517.dtsi b/arch/arm/boot/dts/am3517.dtsi
index bf3002009b00..76f819f4ba48 100644
--- a/arch/arm/boot/dts/am3517.dtsi
+++ b/arch/arm/boot/dts/am3517.dtsi
@@ -16,6 +16,37 @@
can = &hecc;
};
+ cpus {
+ cpu: cpu@0 {
+ /* Based on OMAP3630 variants OPP50 and OPP100 */
+ operating-points-v2 = <&cpu0_opp_table>;
+
+ clock-latency = <300000>; /* From legacy driver */
+ };
+ };
+
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+ /*
+ * AM3517 TRM only lists 600MHz @ 1.2V, but omap36xx
+ * appear to operate at 300MHz as well. Since AM3517 only
+ * lists one operating voltage, it will remain fixed at 1.2V
+ */
+ opp50-300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ opp-microvolt = <1200000>;
+ opp-supported-hw = <0xffffffff 0xffffffff>;
+ opp-suspend;
+ };
+
+ opp100-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1200000>;
+ opp-supported-hw = <0xffffffff 0xffffffff>;
+ };
+ };
+
ocp@68000000 {
am35x_otg_hs: am35x_otg_hs@5c040000 {
compatible = "ti,omap3-musb";
diff --git a/arch/arm/boot/dts/am3517_mt_ventoux.dts b/arch/arm/boot/dts/am3517_mt_ventoux.dts
index e507e4ae0d88..e7d7124a34ba 100644
--- a/arch/arm/boot/dts/am3517_mt_ventoux.dts
+++ b/arch/arm/boot/dts/am3517_mt_ventoux.dts
@@ -8,7 +8,7 @@
/ {
model = "TeeJet Mt.Ventoux";
- compatible = "teejet,mt_ventoux", "ti,omap3";
+ compatible = "teejet,mt_ventoux", "ti,am3517", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/am571x-idk.dts b/arch/arm/boot/dts/am571x-idk.dts
index 0aaacea1d887..820ce3b60bb6 100644
--- a/arch/arm/boot/dts/am571x-idk.dts
+++ b/arch/arm/boot/dts/am571x-idk.dts
@@ -186,3 +186,30 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20 &mmc2_iodelay_ddr_conf>;
};
+
+&mac_sw {
+ pinctrl-names = "default", "sleep";
+ status = "okay";
+};
+
+&cpsw_port1 {
+ phy-handle = <&ethphy0_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <1>;
+};
+
+&cpsw_port2 {
+ phy-handle = <&ethphy1_sw>;
+ phy-mode = "rgmii";
+ ti,dual-emac-pvid = <2>;
+};
+
+&davinci_mdio_sw {
+ ethphy0_sw: ethernet-phy@0 {
+ reg = <0>;
+ };
+
+ ethphy1_sw: ethernet-phy@1 {
+ reg = <1>;
+ };
+};
diff --git a/arch/arm/boot/dts/am572x-idk.dts b/arch/arm/boot/dts/am572x-idk.dts
index ea1c119feaa5..c3d966904d64 100644
--- a/arch/arm/boot/dts/am572x-idk.dts
+++ b/arch/arm/boot/dts/am572x-idk.dts
@@ -27,3 +27,8 @@
pinctrl-1 = <&mmc2_pins_hs>;
pinctrl-2 = <&mmc2_pins_ddr_rev20>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am574x-idk.dts b/arch/arm/boot/dts/am574x-idk.dts
index 7935d70874ce..fa0088025b2c 100644
--- a/arch/arm/boot/dts/am574x-idk.dts
+++ b/arch/arm/boot/dts/am574x-idk.dts
@@ -35,3 +35,8 @@
pinctrl-1 = <&mmc2_pins_default>;
pinctrl-2 = <&mmc2_pins_default>;
};
+
+&mac {
+ status = "okay";
+ dual_emac;
+};
diff --git a/arch/arm/boot/dts/am57xx-idk-common.dtsi b/arch/arm/boot/dts/am57xx-idk-common.dtsi
index 423855a2a2d6..398721c7201c 100644
--- a/arch/arm/boot/dts/am57xx-idk-common.dtsi
+++ b/arch/arm/boot/dts/am57xx-idk-common.dtsi
@@ -363,11 +363,6 @@
ext-clk-src;
};
-&mac {
- status = "okay";
- dual_emac;
-};
-
&cpsw_emac0 {
phy-handle = <&ethphy0>;
phy-mode = "rgmii";
diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi
index 5cac2dd58241..37e048771b0f 100644
--- a/arch/arm/boot/dts/dra7-l4.dtsi
+++ b/arch/arm/boot/dts/dra7-l4.dtsi
@@ -3079,6 +3079,58 @@
phys = <&phy_gmii_sel 2>;
};
};
+
+ mac_sw: switch@0 {
+ compatible = "ti,dra7-cpsw-switch","ti,cpsw-switch";
+ reg = <0x0 0x4000>;
+ ranges = <0 0 0x4000>;
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <1>;
+ syscon = <&scm_conf>;
+ status = "disabled";
+
+ interrupts = <GIC_SPI 334 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 335 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>,
+ <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "rx_thresh", "rx", "tx", "misc";
+
+ ethernet-ports {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpsw_port1: port@1 {
+ reg = <1>;
+ label = "port1";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 1>;
+ };
+
+ cpsw_port2: port@2 {
+ reg = <2>;
+ label = "port2";
+ mac-address = [ 00 00 00 00 00 00 ];
+ phys = <&phy_gmii_sel 2>;
+ };
+ };
+
+ davinci_mdio_sw: mdio@1000 {
+ compatible = "ti,cpsw-mdio","ti,davinci_mdio";
+ clocks = <&gmac_main_clk>;
+ clock-names = "fck";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ bus_freq = <1000000>;
+ reg = <0x1000 0x100>;
+ };
+
+ cpts {
+ clocks = <&gmac_clkctrl DRA7_GMAC_GMAC_CLKCTRL 25>;
+ clock-names = "cpts";
+ };
+ };
};
};
};
diff --git a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
index f7a841a28865..2a0a98fe67f0 100644
--- a/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-som-lv-35xx-devkit.dts
@@ -9,5 +9,5 @@
/ {
model = "LogicPD Zoom OMAP35xx SOM-LV Development Kit";
- compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3";
+ compatible = "logicpd,dm3730-som-lv-devkit", "ti,omap3430", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
index 7675bc3fa868..57bae2aa910e 100644
--- a/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
+++ b/arch/arm/boot/dts/logicpd-torpedo-35xx-devkit.dts
@@ -9,5 +9,5 @@
/ {
model = "LogicPD Zoom OMAP35xx Torpedo Development Kit";
- compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3";
+ compatible = "logicpd,dm3730-torpedo-devkit", "ti,omap3430", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
index d1eae47b83f6..08bae935605c 100644
--- a/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
+++ b/arch/arm/boot/dts/motorola-cpcap-mapphone.dtsi
@@ -43,11 +43,13 @@
compatible = "motorola,mapphone-cpcap-charger";
interrupts-extended = <
&cpcap 13 0 &cpcap 12 0 &cpcap 29 0 &cpcap 28 0
- &cpcap 22 0 &cpcap 20 0 &cpcap 19 0 &cpcap 54 0
+ &cpcap 22 0 &cpcap 21 0 &cpcap 20 0 &cpcap 19 0
+ &cpcap 54 0
>;
interrupt-names =
"chrg_det", "rvrs_chrg", "chrg_se1b", "se0conn",
- "rvrs_mode", "chrgcurr1", "vbusvld", "battdetb";
+ "rvrs_mode", "chrgcurr2", "chrgcurr1", "vbusvld",
+ "battdetb";
mode-gpios = <&gpio3 29 GPIO_ACTIVE_LOW
&gpio3 23 GPIO_ACTIVE_LOW>;
io-channels = <&cpcap_adc 0 &cpcap_adc 1
diff --git a/arch/arm/boot/dts/omap3-beagle-xm.dts b/arch/arm/boot/dts/omap3-beagle-xm.dts
index 1aa99fc1487a..125ed933ca75 100644
--- a/arch/arm/boot/dts/omap3-beagle-xm.dts
+++ b/arch/arm/boot/dts/omap3-beagle-xm.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 BeagleBoard xM";
- compatible = "ti,omap3-beagle-xm", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-beagle-xm", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts
index e3df3c166902..4ed3f93f5841 100644
--- a/arch/arm/boot/dts/omap3-beagle.dts
+++ b/arch/arm/boot/dts/omap3-beagle.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 BeagleBoard";
- compatible = "ti,omap3-beagle", "ti,omap3";
+ compatible = "ti,omap3-beagle", "ti,omap3430", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-cm-t3530.dts b/arch/arm/boot/dts/omap3-cm-t3530.dts
index 76e52c78cbb4..32dbaeaed147 100644
--- a/arch/arm/boot/dts/omap3-cm-t3530.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3530.dts
@@ -9,7 +9,7 @@
/ {
model = "CompuLab CM-T3530";
- compatible = "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+ compatible = "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
/* Regulator to trigger the reset signal of the Wifi module */
mmc2_sdio_reset: regulator-mmc2-sdio-reset {
diff --git a/arch/arm/boot/dts/omap3-cm-t3730.dts b/arch/arm/boot/dts/omap3-cm-t3730.dts
index 6e944dfa0f3d..683819bf0915 100644
--- a/arch/arm/boot/dts/omap3-cm-t3730.dts
+++ b/arch/arm/boot/dts/omap3-cm-t3730.dts
@@ -9,7 +9,7 @@
/ {
model = "CompuLab CM-T3730";
- compatible = "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+ compatible = "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
wl12xx_vmmc2: wl12xx_vmmc2 {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
index a80fc60bc773..afed85078ad8 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd43.dts
@@ -11,7 +11,7 @@
#include "omap3-devkit8000-lcd-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000 with 4.3'' LCD panel";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
lcd0: display {
panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
index 0753776071f8..07c51a105c0d 100644
--- a/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000-lcd70.dts
@@ -11,7 +11,7 @@
#include "omap3-devkit8000-lcd-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000 with 7.0'' LCD panel";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
lcd0: display {
panel-timing {
diff --git a/arch/arm/boot/dts/omap3-devkit8000.dts b/arch/arm/boot/dts/omap3-devkit8000.dts
index faafc48d8f61..162d0726b008 100644
--- a/arch/arm/boot/dts/omap3-devkit8000.dts
+++ b/arch/arm/boot/dts/omap3-devkit8000.dts
@@ -7,7 +7,7 @@
#include "omap3-devkit8000-common.dtsi"
/ {
model = "TimLL OMAP3 Devkit8000";
- compatible = "timll,omap3-devkit8000", "ti,omap3";
+ compatible = "timll,omap3-devkit8000", "ti,omap3430", "ti,omap3";
aliases {
display1 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi
index b6ef1a7ac8a4..409a758c99f1 100644
--- a/arch/arm/boot/dts/omap3-gta04.dtsi
+++ b/arch/arm/boot/dts/omap3-gta04.dtsi
@@ -11,7 +11,7 @@
/ {
model = "OMAP3 GTA04";
- compatible = "ti,omap3-gta04", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-gta04", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-ha-lcd.dts b/arch/arm/boot/dts/omap3-ha-lcd.dts
index badb9b3c8897..c9ecbc45c8e2 100644
--- a/arch/arm/boot/dts/omap3-ha-lcd.dts
+++ b/arch/arm/boot/dts/omap3-ha-lcd.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 HEAD acoustics LCD-baseboard with TAO3530 SOM";
- compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "headacoustics,omap3-ha-lcd", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-ha.dts b/arch/arm/boot/dts/omap3-ha.dts
index a5365252bfbe..35c4e15abeb7 100644
--- a/arch/arm/boot/dts/omap3-ha.dts
+++ b/arch/arm/boot/dts/omap3-ha.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 HEAD acoustics baseboard with TAO3530 SOM";
- compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "headacoustics,omap3-ha", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
index 03dcd05fb8a0..d134ce1cffc0 100644
--- a/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
+++ b/arch/arm/boot/dts/omap3-igep0020-rev-f.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEPv2 Rev. F (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0020-rev-f", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0020-rev-f", "ti,omap3630", "ti,omap36xx", "ti,omap3";
/* Regulator to trigger the WL_EN signal of the Wifi module */
lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
diff --git a/arch/arm/boot/dts/omap3-igep0020.dts b/arch/arm/boot/dts/omap3-igep0020.dts
index 6d0519e3dfd0..e341535a7162 100644
--- a/arch/arm/boot/dts/omap3-igep0020.dts
+++ b/arch/arm/boot/dts/omap3-igep0020.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEPv2 Rev. C (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0020", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0020", "ti,omap3630", "ti,omap36xx", "ti,omap3";
vmmcsdio_fixed: fixedregulator-mmcsdio {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
index 060acd1e803a..9ca1d0f61964 100644
--- a/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
+++ b/arch/arm/boot/dts/omap3-igep0030-rev-g.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEP COM MODULE Rev. G (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0030-rev-g", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0030-rev-g", "ti,omap3630", "ti,omap36xx", "ti,omap3";
/* Regulator to trigger the WL_EN signal of the Wifi module */
lbep5clwmc_wlen: regulator-lbep5clwmc-wlen {
diff --git a/arch/arm/boot/dts/omap3-igep0030.dts b/arch/arm/boot/dts/omap3-igep0030.dts
index 25170bd3c573..32f31035daa2 100644
--- a/arch/arm/boot/dts/omap3-igep0030.dts
+++ b/arch/arm/boot/dts/omap3-igep0030.dts
@@ -10,7 +10,7 @@
/ {
model = "IGEP COM MODULE Rev. E (TI OMAP AM/DM37x)";
- compatible = "isee,omap3-igep0030", "ti,omap36xx", "ti,omap3";
+ compatible = "isee,omap3-igep0030", "ti,omap3630", "ti,omap36xx", "ti,omap3";
vmmcsdio_fixed: fixedregulator-mmcsdio {
compatible = "regulator-fixed";
diff --git a/arch/arm/boot/dts/omap3-ldp.dts b/arch/arm/boot/dts/omap3-ldp.dts
index 9a5fde2d9bce..ec9ba04ef43b 100644
--- a/arch/arm/boot/dts/omap3-ldp.dts
+++ b/arch/arm/boot/dts/omap3-ldp.dts
@@ -10,7 +10,7 @@
/ {
model = "TI OMAP3430 LDP (Zoom1 Labrador)";
- compatible = "ti,omap3-ldp", "ti,omap3";
+ compatible = "ti,omap3-ldp", "ti,omap3430", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
index c22833d4e568..73d477898ec2 100644
--- a/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
+++ b/arch/arm/boot/dts/omap3-lilly-a83x.dtsi
@@ -7,7 +7,7 @@
/ {
model = "INCOstartec LILLY-A83X module (DM3730)";
- compatible = "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+ compatible = "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
chosen {
bootargs = "console=ttyO0,115200n8 vt.global_cursor_default=0 consoleblank=0";
diff --git a/arch/arm/boot/dts/omap3-lilly-dbb056.dts b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
index fec335400074..ecb4ef738e07 100644
--- a/arch/arm/boot/dts/omap3-lilly-dbb056.dts
+++ b/arch/arm/boot/dts/omap3-lilly-dbb056.dts
@@ -8,7 +8,7 @@
/ {
model = "INCOstartec LILLY-DBB056 (DM3730)";
- compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap36xx", "ti,omap3";
+ compatible = "incostartec,omap3-lilly-dbb056", "incostartec,omap3-lilly-a83x", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&twl {
diff --git a/arch/arm/boot/dts/omap3-n9.dts b/arch/arm/boot/dts/omap3-n9.dts
index 74c0ff2350d3..2495a696cec6 100644
--- a/arch/arm/boot/dts/omap3-n9.dts
+++ b/arch/arm/boot/dts/omap3-n9.dts
@@ -12,7 +12,7 @@
/ {
model = "Nokia N9";
- compatible = "nokia,omap3-n9", "ti,omap36xx", "ti,omap3";
+ compatible = "nokia,omap3-n9", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&i2c2 {
diff --git a/arch/arm/boot/dts/omap3-n900.dts b/arch/arm/boot/dts/omap3-n900.dts
index 84a5ade1e865..63659880eeb3 100644
--- a/arch/arm/boot/dts/omap3-n900.dts
+++ b/arch/arm/boot/dts/omap3-n900.dts
@@ -155,6 +155,12 @@
pwms = <&pwm9 0 26316 0>; /* 38000 Hz */
};
+ rom_rng: rng {
+ compatible = "nokia,n900-rom-rng";
+ clocks = <&rng_ick>;
+ clock-names = "ick";
+ };
+
/* controlled (enabled/disabled) directly by bcm2048 and wl1251 */
vctcxo: vctcxo {
compatible = "fixed-clock";
diff --git a/arch/arm/boot/dts/omap3-n950-n9.dtsi b/arch/arm/boot/dts/omap3-n950-n9.dtsi
index 6681d4519e97..a075b63f3087 100644
--- a/arch/arm/boot/dts/omap3-n950-n9.dtsi
+++ b/arch/arm/boot/dts/omap3-n950-n9.dtsi
@@ -11,13 +11,6 @@
cpus {
cpu@0 {
cpu0-supply = <&vcc>;
- operating-points = <
- /* kHz uV */
- 300000 1012500
- 600000 1200000
- 800000 1325000
- 1000000 1375000
- >;
};
};
diff --git a/arch/arm/boot/dts/omap3-n950.dts b/arch/arm/boot/dts/omap3-n950.dts
index 9886bf8b90ab..31d47a1fad84 100644
--- a/arch/arm/boot/dts/omap3-n950.dts
+++ b/arch/arm/boot/dts/omap3-n950.dts
@@ -12,7 +12,7 @@
/ {
model = "Nokia N950";
- compatible = "nokia,omap3-n950", "ti,omap36xx", "ti,omap3";
+ compatible = "nokia,omap3-n950", "ti,omap3630", "ti,omap36xx", "ti,omap3";
keys {
compatible = "gpio-keys";
diff --git a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
index 18338576c41d..7f04dfad8203 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-alto35.dts
@@ -14,5 +14,5 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Alto35";
- compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-alto35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
index f204c8af8281..bc5a04e03336 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-chestnut43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Chestnut43";
- compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-chestnut43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
index c633f7cee68e..065c31cbf0e2 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-gallop43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Gallop43";
- compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-gallop43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
index fb88ebc9858c..e38c1c51392c 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo35.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo35";
- compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-palo35", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
index 76cca00d97b6..e6dc23159c4d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-palo43.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Palo43";
- compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-palo43", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-summit.dts b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
index cc081a9e4c1e..587c08ce282d 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-summit.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-summit.dts
@@ -14,7 +14,7 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Summit";
- compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-summit", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
index 1de41c0826e0..f57de6010994 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobi.dts
@@ -14,6 +14,6 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on Tobi";
- compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-tobi", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
index 9ed13118ed8e..281af6c113be 100644
--- a/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
+++ b/arch/arm/boot/dts/omap3-overo-storm-tobiduo.dts
@@ -14,5 +14,5 @@
/ {
model = "OMAP36xx/AM37xx/DM37xx Gumstix Overo on TobiDuo";
- compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap36xx", "ti,omap3";
+ compatible = "gumstix,omap3-overo-tobiduo", "gumstix,omap3-overo", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
diff --git a/arch/arm/boot/dts/omap3-pandora-1ghz.dts b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
index 81b957f33c9f..ea509956d7ac 100644
--- a/arch/arm/boot/dts/omap3-pandora-1ghz.dts
+++ b/arch/arm/boot/dts/omap3-pandora-1ghz.dts
@@ -16,7 +16,7 @@
/ {
model = "Pandora Handheld Console 1GHz";
- compatible = "openpandora,omap3-pandora-1ghz", "ti,omap36xx", "ti,omap3";
+ compatible = "openpandora,omap3-pandora-1ghz", "ti,omap3630", "ti,omap36xx", "ti,omap3";
};
&omap3_pmx_core2 {
diff --git a/arch/arm/boot/dts/omap3-pandora-common.dtsi b/arch/arm/boot/dts/omap3-pandora-common.dtsi
index ec5891718ae6..150d5be42d27 100644
--- a/arch/arm/boot/dts/omap3-pandora-common.dtsi
+++ b/arch/arm/boot/dts/omap3-pandora-common.dtsi
@@ -226,6 +226,17 @@
gpio = <&gpio6 4 GPIO_ACTIVE_HIGH>; /* GPIO_164 */
};
+ /* wl1251 wifi+bt module */
+ wlan_en: fixed-regulator-wg7210_en {
+ compatible = "regulator-fixed";
+ regulator-name = "vwlan";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ startup-delay-us = <50000>;
+ enable-active-high;
+ gpio = <&gpio1 23 GPIO_ACTIVE_HIGH>;
+ };
+
/* wg7210 (wifi+bt module) 32k clock buffer */
wg7210_32k: fixed-regulator-wg7210_32k {
compatible = "regulator-fixed";
@@ -522,9 +533,30 @@
/*wp-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>;*/ /* GPIO_127 */
};
-/* mmc3 is probed using pdata-quirks to pass wl1251 card data */
&mmc3 {
- status = "disabled";
+ vmmc-supply = <&wlan_en>;
+
+ bus-width = <4>;
+ non-removable;
+ ti,non-removable;
+ cap-power-off-card;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc3_pins>;
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ wlan: wifi@1 {
+ compatible = "ti,wl1251";
+
+ reg = <1>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <21 IRQ_TYPE_LEVEL_HIGH>; /* GPIO_21 */
+
+ ti,wl1251-has-eeprom;
+ };
};
/* bluetooth*/
diff --git a/arch/arm/boot/dts/omap3-sbc-t3530.dts b/arch/arm/boot/dts/omap3-sbc-t3530.dts
index ae96002abb3b..24bf3fd86641 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3530.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3530.dts
@@ -8,7 +8,7 @@
/ {
model = "CompuLab SBC-T3530 with CM-T3530";
- compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap34xx", "ti,omap3";
+ compatible = "compulab,omap3-sbc-t3530", "compulab,omap3-cm-t3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
aliases {
display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sbc-t3730.dts b/arch/arm/boot/dts/omap3-sbc-t3730.dts
index 7de6df16fc17..eb3893b9535e 100644
--- a/arch/arm/boot/dts/omap3-sbc-t3730.dts
+++ b/arch/arm/boot/dts/omap3-sbc-t3730.dts
@@ -8,7 +8,7 @@
/ {
model = "CompuLab SBC-T3730 with CM-T3730";
- compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap36xx", "ti,omap3";
+ compatible = "compulab,omap3-sbc-t3730", "compulab,omap3-cm-t3730", "ti,omap3630", "ti,omap36xx", "ti,omap3";
aliases {
display0 = &dvi0;
diff --git a/arch/arm/boot/dts/omap3-sniper.dts b/arch/arm/boot/dts/omap3-sniper.dts
index 40a87330e8c3..b6879cdc5c13 100644
--- a/arch/arm/boot/dts/omap3-sniper.dts
+++ b/arch/arm/boot/dts/omap3-sniper.dts
@@ -9,7 +9,7 @@
/ {
model = "LG Optimus Black";
- compatible = "lg,omap3-sniper", "ti,omap36xx", "ti,omap3";
+ compatible = "lg,omap3-sniper", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3-thunder.dts b/arch/arm/boot/dts/omap3-thunder.dts
index 6276e7079b36..64221e3b3477 100644
--- a/arch/arm/boot/dts/omap3-thunder.dts
+++ b/arch/arm/boot/dts/omap3-thunder.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3 Thunder baseboard with TAO3530 SOM";
- compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap34xx", "ti,omap3";
+ compatible = "technexion,omap3-thunder", "technexion,omap3-tao3530", "ti,omap3430", "ti,omap34xx", "ti,omap3";
};
&omap3_pmx_core {
diff --git a/arch/arm/boot/dts/omap3-zoom3.dts b/arch/arm/boot/dts/omap3-zoom3.dts
index db3a2fe84e99..d240e39f2151 100644
--- a/arch/arm/boot/dts/omap3-zoom3.dts
+++ b/arch/arm/boot/dts/omap3-zoom3.dts
@@ -9,7 +9,7 @@
/ {
model = "TI Zoom3";
- compatible = "ti,omap3-zoom3", "ti,omap36xx", "ti,omap3";
+ compatible = "ti,omap3-zoom3", "ti,omap3630", "ti,omap36xx", "ti,omap3";
cpus {
cpu@0 {
diff --git a/arch/arm/boot/dts/omap3430-sdp.dts b/arch/arm/boot/dts/omap3430-sdp.dts
index 0abd61108a53..7bfde8aac7ae 100644
--- a/arch/arm/boot/dts/omap3430-sdp.dts
+++ b/arch/arm/boot/dts/omap3430-sdp.dts
@@ -8,7 +8,7 @@
/ {
model = "TI OMAP3430 SDP";
- compatible = "ti,omap3430-sdp", "ti,omap3";
+ compatible = "ti,omap3430-sdp", "ti,omap3430", "ti,omap3";
memory@80000000 {
device_type = "memory";
diff --git a/arch/arm/boot/dts/omap34xx.dtsi b/arch/arm/boot/dts/omap34xx.dtsi
index 7b09cbee8bb8..c4dd9801840d 100644
--- a/arch/arm/boot/dts/omap34xx.dtsi
+++ b/arch/arm/boot/dts/omap34xx.dtsi
@@ -16,19 +16,67 @@
/ {
cpus {
cpu: cpu@0 {
- /* OMAP343x/OMAP35xx variants OPP1-5 */
- operating-points = <
- /* kHz uV */
- 125000 975000
- 250000 1075000
- 500000 1200000
- 550000 1270000
- 600000 1350000
- >;
+ /* OMAP343x/OMAP35xx variants OPP1-6 */
+ operating-points-v2 = <&cpu0_opp_table>;
+
clock-latency = <300000>; /* From legacy driver */
};
};
+ /* see Documentation/devicetree/bindings/opp/opp.txt */
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+
+ opp1-125000000 {
+ opp-hz = /bits/ 64 <125000000>;
+ /*
+ * we currently only select the max voltage from table
+ * Table 3-3 of the omap3530 Data sheet (SPRS507F).
+ * Format is: <target min max>
+ */
+ opp-microvolt = <975000 975000 975000>;
+ /*
+ * first value is silicon revision bit mask
+ * second one 720MHz Device Identification bit mask
+ */
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp2-250000000 {
+ opp-hz = /bits/ 64 <250000000>;
+ opp-microvolt = <1075000 1075000 1075000>;
+ opp-supported-hw = <0xffffffff 3>;
+ opp-suspend;
+ };
+
+ opp3-500000000 {
+ opp-hz = /bits/ 64 <500000000>;
+ opp-microvolt = <1200000 1200000 1200000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp4-550000000 {
+ opp-hz = /bits/ 64 <550000000>;
+ opp-microvolt = <1275000 1275000 1275000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp5-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1350000 1350000 1350000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp6-720000000 {
+ opp-hz = /bits/ 64 <720000000>;
+ opp-microvolt = <1350000 1350000 1350000>;
+ /* only high-speed grade omap3530 devices */
+ opp-supported-hw = <0xffffffff 2>;
+ turbo-mode;
+ };
+ };
+
ocp@68000000 {
omap3_pmx_core2: pinmux@480025d8 {
compatible = "ti,omap3-padconf", "pinctrl-single";
diff --git a/arch/arm/boot/dts/omap36xx.dtsi b/arch/arm/boot/dts/omap36xx.dtsi
index 1e552f08f120..c618cb257d00 100644
--- a/arch/arm/boot/dts/omap36xx.dtsi
+++ b/arch/arm/boot/dts/omap36xx.dtsi
@@ -19,16 +19,65 @@
};
cpus {
- /* OMAP3630/OMAP37xx 'standard device' variants OPP50 to OPP130 */
+ /* OMAP3630/OMAP37xx variants OPP50 to OPP130 and OPP1G */
cpu: cpu@0 {
- operating-points = <
- /* kHz uV */
- 300000 1012500
- 600000 1200000
- 800000 1325000
- >;
- clock-latency = <300000>; /* From legacy driver */
+ operating-points-v2 = <&cpu0_opp_table>;
+
+ vbb-supply = <&abb_mpu_iva>;
+ clock-latency = <300000>; /* From omap-cpufreq driver */
+ };
+ };
+
+ /* see Documentation/devicetree/bindings/opp/opp.txt */
+ cpu0_opp_table: opp-table {
+ compatible = "operating-points-v2-ti-cpu";
+ syscon = <&scm_conf>;
+
+ opp50-300000000 {
+ opp-hz = /bits/ 64 <300000000>;
+ /*
+ * we currently only select the max voltage from table
+ * Table 4-19 of the DM3730 Data sheet (SPRS685B)
+ * Format is: cpu0-supply: <target min max>
+ * vbb-supply: <target min max>
+ */
+ opp-microvolt = <1012500 1012500 1012500>,
+ <1012500 1012500 1012500>;
+ /*
+ * first value is silicon revision bit mask
+ * second one is "speed binned" bit mask
+ */
+ opp-supported-hw = <0xffffffff 3>;
+ opp-suspend;
+ };
+
+ opp100-600000000 {
+ opp-hz = /bits/ 64 <600000000>;
+ opp-microvolt = <1200000 1200000 1200000>,
+ <1200000 1200000 1200000>;
+ opp-supported-hw = <0xffffffff 3>;
+ };
+
+ opp130-800000000 {
+ opp-hz = /bits/ 64 <800000000>;
+ opp-microvolt = <1325000 1325000 1325000>,
+ <1325000 1325000 1325000>;
+ opp-supported-hw = <0xffffffff 3>;
};
+
+ opp1g-1000000000 {
+ opp-hz = /bits/ 64 <1000000000>;
+ opp-microvolt = <1375000 1375000 1375000>,
+ <1375000 1375000 1375000>;
+ /* only on am/dm37x with speed-binned bit set */
+ opp-supported-hw = <0xffffffff 2>;
+ turbo-mode;
+ };
+ };
+
+ opp_supply_mpu_iva: opp_supply {
+ compatible = "ti,omap-opp-supply";
+ ti,absolute-max-voltage-uv = <1375000>;
};
ocp@68000000 {
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index 40d7f1a4fc45..89cce8d4bc6b 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -554,3 +554,4 @@ CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_SCHEDSTATS=y
# CONFIG_DEBUG_BUGVERBOSE is not set
+CONFIG_TI_CPSW_SWITCHDEV=y
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 043b0b18bf7e..2674de6ada1f 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON
config CRYPTO_SHA1_ARM_CE
tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_SHA1_ARM
select CRYPTO_HASH
help
@@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE
config CRYPTO_SHA2_ARM_CE
tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_SHA256_ARM
select CRYPTO_HASH
help
@@ -81,7 +81,7 @@ config CRYPTO_AES_ARM
config CRYPTO_AES_ARM_BS
tristate "Bit sliced AES using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
@@ -96,8 +96,8 @@ config CRYPTO_AES_ARM_BS
config CRYPTO_AES_ARM_CE
tristate "Accelerated AES using ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ select CRYPTO_SKCIPHER
select CRYPTO_LIB_AES
select CRYPTO_SIMD
help
@@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE
config CRYPTO_GHASH_ARM_CE
tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions"
- depends on KERNEL_MODE_NEON
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
select CRYPTO_HASH
select CRYPTO_CRYPTD
select CRYPTO_GF128MUL
@@ -118,23 +118,35 @@ config CRYPTO_GHASH_ARM_CE
config CRYPTO_CRCT10DIF_ARM_CE
tristate "CRCT10DIF digest algorithm using PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC_T10DIF
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ depends on CRC_T10DIF
select CRYPTO_HASH
config CRYPTO_CRC32_ARM_CE
tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions"
- depends on KERNEL_MODE_NEON && CRC32
+ depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800)
+ depends on CRC32
select CRYPTO_HASH
config CRYPTO_CHACHA20_NEON
- tristate "NEON accelerated ChaCha stream cipher algorithms"
- depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ tristate "NEON and scalar accelerated ChaCha stream cipher algorithms"
+ select CRYPTO_SKCIPHER
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_ARM
+ tristate "Accelerated scalar and SIMD Poly1305 hash implementations"
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_NHPOLY1305_NEON
tristate "NEON accelerated NHPoly1305 hash function (for Adiantum)"
depends on KERNEL_MODE_NEON
select CRYPTO_NHPOLY1305
+config CRYPTO_CURVE25519_NEON
+ tristate "NEON accelerated Curve25519 scalar multiplication library"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_LIB_CURVE25519_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+
endif
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 4180f3a13512..b745c17d356f 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -10,34 +10,16 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
+obj-$(CONFIG_CRYPTO_CURVE25519_NEON) += curve25519-neon.o
-ce-obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
-ce-obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
-crc-obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
-
-ifneq ($(crc-obj-y)$(crc-obj-m),)
-ifeq ($(call as-instr,.arch armv8-a\n.arch_extension crc,y,n),y)
-ce-obj-y += $(crc-obj-y)
-ce-obj-m += $(crc-obj-m)
-else
-$(warning These CRC Extensions modules need binutils 2.23 or higher)
-$(warning $(crc-obj-y) $(crc-obj-m))
-endif
-endif
-
-ifneq ($(ce-obj-y)$(ce-obj-m),)
-ifeq ($(call as-instr,.fpu crypto-neon-fp-armv8,y,n),y)
-obj-y += $(ce-obj-y)
-obj-m += $(ce-obj-m)
-else
-$(warning These ARMv8 Crypto Extensions modules need binutils 2.23 or higher)
-$(warning $(ce-obj-y) $(ce-obj-m))
-endif
-endif
+obj-$(CONFIG_CRYPTO_AES_ARM_CE) += aes-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) += sha1-arm-ce.o
+obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) += sha2-arm-ce.o
+obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) += ghash-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) += crct10dif-arm-ce.o
+obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) += crc32-arm-ce.o
aes-arm-y := aes-cipher-core.o aes-cipher-glue.o
aes-arm-bs-y := aes-neonbs-core.o aes-neonbs-glue.o
@@ -53,13 +35,19 @@ aes-arm-ce-y := aes-ce-core.o aes-ce-glue.o
ghash-arm-ce-y := ghash-ce-core.o ghash-ce-glue.o
crct10dif-arm-ce-y := crct10dif-ce-core.o crct10dif-ce-glue.o
crc32-arm-ce-y:= crc32-ce-core.o crc32-ce-glue.o
-chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+chacha-neon-y := chacha-scalar-core.o chacha-glue.o
+chacha-neon-$(CONFIG_KERNEL_MODE_NEON) += chacha-neon-core.o
+poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
+curve25519-neon-y := curve25519-core.o curve25519-glue.o
ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+ $(call cmd,perl)
+
$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
$(call cmd,perl)
@@ -67,4 +55,9 @@ $(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
$(call cmd,perl)
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
+
+# massage the perlasm code a bit so we only get the NEON routine if we need it
+poly1305-aflags-$(CONFIG_CPU_V7) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=5
+poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) := -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_ARCH__=7
+AFLAGS_poly1305-core.o += $(poly1305-aflags-y)
diff --git a/arch/arm/crypto/chacha-glue.c b/arch/arm/crypto/chacha-glue.c
new file mode 100644
index 000000000000..3f0c057aa050
--- /dev/null
+++ b/arch/arm/crypto/chacha-glue.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2016-2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2015 Martin Willi
+ */
+
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cputype.h>
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+
+asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
+ int nrounds);
+asmlinkage void hchacha_block_arm(const u32 *state, u32 *out, int nrounds);
+asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+
+asmlinkage void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ const u32 *state, int nrounds);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(use_neon);
+
+static inline bool neon_usable(void)
+{
+ return static_branch_likely(&use_neon) && crypto_simd_usable();
+}
+
+static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds)
+{
+ u8 buf[CHACHA_BLOCK_SIZE];
+
+ while (bytes >= CHACHA_BLOCK_SIZE * 4) {
+ chacha_4block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE * 4;
+ src += CHACHA_BLOCK_SIZE * 4;
+ dst += CHACHA_BLOCK_SIZE * 4;
+ state[12] += 4;
+ }
+ while (bytes >= CHACHA_BLOCK_SIZE) {
+ chacha_block_xor_neon(state, dst, src, nrounds);
+ bytes -= CHACHA_BLOCK_SIZE;
+ src += CHACHA_BLOCK_SIZE;
+ dst += CHACHA_BLOCK_SIZE;
+ state[12]++;
+ }
+ if (bytes) {
+ memcpy(buf, src, bytes);
+ chacha_block_xor_neon(state, buf, buf, nrounds);
+ memcpy(dst, buf, bytes);
+ }
+}
+
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable()) {
+ hchacha_block_arm(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!IS_ENABLED(CONFIG_KERNEL_MODE_NEON) || !neon_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE) {
+ chacha_doarm(dst, src, bytes, state, nrounds);
+ state[12] += DIV_ROUND_UP(bytes, CHACHA_BLOCK_SIZE);
+ return;
+ }
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv,
+ bool neon)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ if (!neon) {
+ chacha_doarm(walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, state, ctx->nrounds);
+ state[12] += DIV_ROUND_UP(nbytes, CHACHA_BLOCK_SIZE);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int do_chacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_stream_xor(req, ctx, req->iv, neon);
+}
+
+static int chacha_arm(struct skcipher_request *req)
+{
+ return do_chacha(req, false);
+}
+
+static int chacha_neon(struct skcipher_request *req)
+{
+ return do_chacha(req, neon_usable());
+}
+
+static int do_xchacha(struct skcipher_request *req, bool neon)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (!neon) {
+ hchacha_block_arm(state, subctx.key, ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, subctx.key, ctx->nrounds);
+ kernel_neon_end();
+ }
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_stream_xor(req, &subctx, real_iv, neon);
+}
+
+static int xchacha_arm(struct skcipher_request *req)
+{
+ return do_xchacha(req, false);
+}
+
+static int xchacha_neon(struct skcipher_request *req)
+{
+ return do_xchacha(req, neon_usable());
+}
+
+static struct skcipher_alg arm_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_arm,
+ .decrypt = chacha_arm,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-arm",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_arm,
+ .decrypt = xchacha_arm,
+ },
+};
+
+static struct skcipher_alg neon_algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_neon,
+ .decrypt = chacha_neon,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-neon",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .walksize = 4 * CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_neon,
+ .decrypt = xchacha_neon,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ int err;
+
+ err = crypto_register_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (err)
+ return err;
+
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) {
+ int i;
+
+ switch (read_cpuid_part()) {
+ case ARM_CPU_PART_CORTEX_A7:
+ case ARM_CPU_PART_CORTEX_A5:
+ /*
+ * The Cortex-A7 and Cortex-A5 do not perform well with
+ * the NEON implementation but do incredibly with the
+ * scalar one and use less power.
+ */
+ for (i = 0; i < ARRAY_SIZE(neon_algs); i++)
+ neon_algs[i].base.cra_priority = 0;
+ break;
+ default:
+ static_branch_enable(&use_neon);
+ }
+
+ err = crypto_register_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+ if (err)
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ }
+ return err;
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(arm_algs, ARRAY_SIZE(arm_algs));
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON))
+ crypto_unregister_skciphers(neon_algs, ARRAY_SIZE(neon_algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (scalar and NEON accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-arm");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-arm");
+#ifdef CONFIG_KERNEL_MODE_NEON
+MODULE_ALIAS_CRYPTO("chacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha20-neon");
+MODULE_ALIAS_CRYPTO("xchacha12-neon");
+#endif
diff --git a/arch/arm/crypto/chacha-neon-glue.c b/arch/arm/crypto/chacha-neon-glue.c
deleted file mode 100644
index a8e9b534c8da..000000000000
--- a/arch/arm/crypto/chacha-neon-glue.c
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
- * including ChaCha20 (RFC7539)
- *
- * Copyright (C) 2016 Linaro, Ltd. <ard.biesheuvel@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on:
- * ChaCha20 256-bit cipher algorithm, RFC7539, SIMD glue code
- *
- * Copyright (C) 2015 Martin Willi
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/chacha.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <asm/hwcap.h>
-#include <asm/neon.h>
-#include <asm/simd.h>
-
-asmlinkage void chacha_block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void chacha_4block_xor_neon(const u32 *state, u8 *dst, const u8 *src,
- int nrounds);
-asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
-
-static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
- unsigned int bytes, int nrounds)
-{
- u8 buf[CHACHA_BLOCK_SIZE];
-
- while (bytes >= CHACHA_BLOCK_SIZE * 4) {
- chacha_4block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE * 4;
- src += CHACHA_BLOCK_SIZE * 4;
- dst += CHACHA_BLOCK_SIZE * 4;
- state[12] += 4;
- }
- while (bytes >= CHACHA_BLOCK_SIZE) {
- chacha_block_xor_neon(state, dst, src, nrounds);
- bytes -= CHACHA_BLOCK_SIZE;
- src += CHACHA_BLOCK_SIZE;
- dst += CHACHA_BLOCK_SIZE;
- state[12]++;
- }
- if (bytes) {
- memcpy(buf, src, bytes);
- chacha_block_xor_neon(state, buf, buf, nrounds);
- memcpy(dst, buf, bytes);
- }
-}
-
-static int chacha_neon_stream_xor(struct skcipher_request *req,
- const struct chacha_ctx *ctx, const u8 *iv)
-{
- struct skcipher_walk walk;
- u32 state[16];
- int err;
-
- err = skcipher_walk_virt(&walk, req, false);
-
- crypto_chacha_init(state, ctx, iv);
-
- while (walk.nbytes > 0) {
- unsigned int nbytes = walk.nbytes;
-
- if (nbytes < walk.total)
- nbytes = round_down(nbytes, walk.stride);
-
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
- err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
- }
-
- return err;
-}
-
-static int chacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
- return chacha_neon_stream_xor(req, ctx, req->iv);
-}
-
-static int xchacha_neon(struct skcipher_request *req)
-{
- struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
- struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct chacha_ctx subctx;
- u32 state[16];
- u8 real_iv[16];
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
- subctx.nrounds = ctx->nrounds;
-
- memcpy(&real_iv[0], req->iv + 24, 8);
- memcpy(&real_iv[8], req->iv + 16, 8);
- return chacha_neon_stream_xor(req, &subctx, real_iv);
-}
-
-static struct skcipher_alg algs[] = {
- {
- .base.cra_name = "chacha20",
- .base.cra_driver_name = "chacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = CHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = chacha_neon,
- .decrypt = chacha_neon,
- }, {
- .base.cra_name = "xchacha20",
- .base.cra_driver_name = "xchacha20-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }, {
- .base.cra_name = "xchacha12",
- .base.cra_driver_name = "xchacha12-neon",
- .base.cra_priority = 300,
- .base.cra_blocksize = 1,
- .base.cra_ctxsize = sizeof(struct chacha_ctx),
- .base.cra_module = THIS_MODULE,
-
- .min_keysize = CHACHA_KEY_SIZE,
- .max_keysize = CHACHA_KEY_SIZE,
- .ivsize = XCHACHA_IV_SIZE,
- .chunksize = CHACHA_BLOCK_SIZE,
- .walksize = 4 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
- .encrypt = xchacha_neon,
- .decrypt = xchacha_neon,
- }
-};
-
-static int __init chacha_simd_mod_init(void)
-{
- if (!(elf_hwcap & HWCAP_NEON))
- return -ENODEV;
-
- return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-static void __exit chacha_simd_mod_fini(void)
-{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
-}
-
-module_init(chacha_simd_mod_init);
-module_exit(chacha_simd_mod_fini);
-
-MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (NEON accelerated)");
-MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
-MODULE_LICENSE("GPL v2");
-MODULE_ALIAS_CRYPTO("chacha20");
-MODULE_ALIAS_CRYPTO("chacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha20");
-MODULE_ALIAS_CRYPTO("xchacha20-neon");
-MODULE_ALIAS_CRYPTO("xchacha12");
-MODULE_ALIAS_CRYPTO("xchacha12-neon");
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
new file mode 100644
index 000000000000..2985b80a45b5
--- /dev/null
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -0,0 +1,460 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Google, Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/*
+ * Design notes:
+ *
+ * 16 registers would be needed to hold the state matrix, but only 14 are
+ * available because 'sp' and 'pc' cannot be used. So we spill the elements
+ * (x8, x9) to the stack and swap them out with (x10, x11). This adds one
+ * 'ldrd' and one 'strd' instruction per round.
+ *
+ * All rotates are performed using the implicit rotate operand accepted by the
+ * 'add' and 'eor' instructions. This is faster than using explicit rotate
+ * instructions. To make this work, we allow the values in the second and last
+ * rows of the ChaCha state matrix (rows 'b' and 'd') to temporarily have the
+ * wrong rotation amount. The rotation amount is then fixed up just in time
+ * when the values are used. 'brot' is the number of bits the values in row 'b'
+ * need to be rotated right to arrive at the correct values, and 'drot'
+ * similarly for row 'd'. (brot, drot) start out as (0, 0) but we make it such
+ * that they end up as (25, 24) after every round.
+ */
+
+ // ChaCha state registers
+ X0 .req r0
+ X1 .req r1
+ X2 .req r2
+ X3 .req r3
+ X4 .req r4
+ X5 .req r5
+ X6 .req r6
+ X7 .req r7
+ X8_X10 .req r8 // shared by x8 and x10
+ X9_X11 .req r9 // shared by x9 and x11
+ X12 .req r10
+ X13 .req r11
+ X14 .req r12
+ X15 .req r14
+
+.macro __rev out, in, t0, t1, t2
+.if __LINUX_ARM_ARCH__ >= 6
+ rev \out, \in
+.else
+ lsl \t0, \in, #24
+ and \t1, \in, #0xff00
+ and \t2, \in, #0xff0000
+ orr \out, \t0, \in, lsr #24
+ orr \out, \out, \t1, lsl #8
+ orr \out, \out, \t2, lsr #8
+.endif
+.endm
+
+.macro _le32_bswap x, t0, t1, t2
+#ifdef __ARMEB__
+ __rev \x, \x, \t0, \t1, \t2
+#endif
+.endm
+
+.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
+ _le32_bswap \a, \t0, \t1, \t2
+ _le32_bswap \b, \t0, \t1, \t2
+ _le32_bswap \c, \t0, \t1, \t2
+ _le32_bswap \d, \t0, \t1, \t2
+.endm
+
+.macro __ldrd a, b, src, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ ldrd \a, \b, [\src, #\offset]
+#else
+ ldr \a, [\src, #\offset]
+ ldr \b, [\src, #\offset + 4]
+#endif
+.endm
+
+.macro __strd a, b, dst, offset
+#if __LINUX_ARM_ARCH__ >= 6
+ strd \a, \b, [\dst, #\offset]
+#else
+ str \a, [\dst, #\offset]
+ str \b, [\dst, #\offset + 4]
+#endif
+.endm
+
+.macro _halfround a1, b1, c1, d1, a2, b2, c2, d2
+
+ // a += b; d ^= a; d = rol(d, 16);
+ add \a1, \a1, \b1, ror #brot
+ add \a2, \a2, \b2, ror #brot
+ eor \d1, \a1, \d1, ror #drot
+ eor \d2, \a2, \d2, ror #drot
+ // drot == 32 - 16 == 16
+
+ // c += d; b ^= c; b = rol(b, 12);
+ add \c1, \c1, \d1, ror #16
+ add \c2, \c2, \d2, ror #16
+ eor \b1, \c1, \b1, ror #brot
+ eor \b2, \c2, \b2, ror #brot
+ // brot == 32 - 12 == 20
+
+ // a += b; d ^= a; d = rol(d, 8);
+ add \a1, \a1, \b1, ror #20
+ add \a2, \a2, \b2, ror #20
+ eor \d1, \a1, \d1, ror #16
+ eor \d2, \a2, \d2, ror #16
+ // drot == 32 - 8 == 24
+
+ // c += d; b ^= c; b = rol(b, 7);
+ add \c1, \c1, \d1, ror #24
+ add \c2, \c2, \d2, ror #24
+ eor \b1, \c1, \b1, ror #20
+ eor \b2, \c2, \b2, ror #20
+ // brot == 32 - 7 == 25
+.endm
+
+.macro _doubleround
+
+ // column round
+
+ // quarterrounds: (x0, x4, x8, x12) and (x1, x5, x9, x13)
+ _halfround X0, X4, X8_X10, X12, X1, X5, X9_X11, X13
+
+ // save (x8, x9); restore (x10, x11)
+ __strd X8_X10, X9_X11, sp, 0
+ __ldrd X8_X10, X9_X11, sp, 8
+
+ // quarterrounds: (x2, x6, x10, x14) and (x3, x7, x11, x15)
+ _halfround X2, X6, X8_X10, X14, X3, X7, X9_X11, X15
+
+ .set brot, 25
+ .set drot, 24
+
+ // diagonal round
+
+ // quarterrounds: (x0, x5, x10, x15) and (x1, x6, x11, x12)
+ _halfround X0, X5, X8_X10, X15, X1, X6, X9_X11, X12
+
+ // save (x10, x11); restore (x8, x9)
+ __strd X8_X10, X9_X11, sp, 8
+ __ldrd X8_X10, X9_X11, sp, 0
+
+ // quarterrounds: (x2, x7, x8, x13) and (x3, x4, x9, x14)
+ _halfround X2, X7, X8_X10, X13, X3, X4, X9_X11, X14
+.endm
+
+.macro _chacha_permute nrounds
+ .set brot, 0
+ .set drot, 0
+ .rept \nrounds / 2
+ _doubleround
+ .endr
+.endm
+
+.macro _chacha nrounds
+
+.Lnext_block\@:
+ // Stack: unused0-unused1 x10-x11 x0-x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+
+ // Do the core ChaCha permutation to update x0-x15.
+ _chacha_permute \nrounds
+
+ add sp, #8
+ // Stack: x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers contain x0-x9,x12-x15.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Free up some registers (r8-r12,r14) by pushing (x8-x9,x12-x15).
+ push {X8_X10, X9_X11, X12, X13, X14, X15}
+
+ // Load (OUT, IN, LEN).
+ ldr r14, [sp, #96]
+ ldr r12, [sp, #100]
+ ldr r11, [sp, #104]
+
+ orr r10, r14, r12
+
+ // Use slow path if fewer than 64 bytes remain.
+ cmp r11, #64
+ blt .Lxor_slowpath\@
+
+ // Use slow path if IN and/or OUT isn't 4-byte aligned. Needed even on
+ // ARMv6+, since ldmia and stmia (used below) still require alignment.
+ tst r10, #3
+ bne .Lxor_slowpath\@
+
+ // Fast path: XOR 64 bytes of aligned data.
+
+ // Stack: x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is OUT.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // x0-x3
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor X0, X0, r8
+ eor X1, X1, r9
+ eor X2, X2, r10
+ eor X3, X3, r11
+ stmia r14!, {X0-X3}
+
+ // x4-x7
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ ldmia r12!, {X0-X3}
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ eor X4, X4, X0
+ eor X5, X5, X1
+ eor X6, X6, X2
+ eor X7, X7, X3
+ stmia r14!, {X4-X7}
+
+ // x8-x15
+ pop {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 32
+ __ldrd r10, r11, sp, 40
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ ldmia r12!, {r8-r11}
+ eor r0, r0, r8 // x8
+ eor r1, r1, r9 // x9
+ eor r6, r6, r10 // x10
+ eor r7, r7, r11 // x11
+ stmia r14!, {r0,r1,r6,r7}
+ ldmia r12!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 48
+ __ldrd r10, r11, sp, 56
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ ldr r9, [sp, #72] // load LEN
+ eor r2, r2, r0 // x12
+ eor r3, r3, r1 // x13
+ eor r4, r4, r6 // x14
+ eor r5, r5, r7 // x15
+ subs r9, #64 // decrement and check LEN
+ stmia r14!, {r2-r5}
+
+ beq .Ldone\@
+
+.Lprepare_for_next_block\@:
+
+ // Stack: x0-x15 OUT IN LEN
+
+ // Increment block counter (x12)
+ add r8, #1
+
+ // Store updated (OUT, IN, LEN)
+ str r14, [sp, #64]
+ str r12, [sp, #68]
+ str r9, [sp, #72]
+
+ mov r14, sp
+
+ // Store updated block counter (x12)
+ str r8, [sp, #48]
+
+ sub sp, #16
+
+ // Reload state and do next block
+ ldmia r14!, {r0-r11} // load x0-x11
+ __strd r10, r11, sp, 8 // store x10-x11 before state
+ ldmia r14, {r10-r12,r14} // load x12-x15
+ b .Lnext_block\@
+
+.Lxor_slowpath\@:
+ // Slow path: < 64 bytes remaining, or unaligned input or output buffer.
+ // We handle it by storing the 64 bytes of keystream to the stack, then
+ // XOR-ing the needed portion with the data.
+
+ // Allocate keystream buffer
+ sub sp, #64
+ mov r14, sp
+
+ // Stack: ks0-ks15 x8-x9 x12-x15 x10-x11 orig_x0-orig_x15 OUT IN LEN
+ // Registers: r0-r7 are x0-x7; r8-r11 are free; r12 is IN; r14 is &ks0.
+ // x4-x7 are rotated by 'brot'; x12-x15 are rotated by 'drot'.
+
+ // Save keystream for x0-x3
+ __ldrd r8, r9, sp, 96
+ __ldrd r10, r11, sp, 104
+ add X0, X0, r8
+ add X1, X1, r9
+ add X2, X2, r10
+ add X3, X3, r11
+ _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ stmia r14!, {X0-X3}
+
+ // Save keystream for x4-x7
+ __ldrd r8, r9, sp, 112
+ __ldrd r10, r11, sp, 120
+ add X4, r8, X4, ror #brot
+ add X5, r9, X5, ror #brot
+ add X6, r10, X6, ror #brot
+ add X7, r11, X7, ror #brot
+ _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ add r8, sp, #64
+ stmia r14!, {X4-X7}
+
+ // Save keystream for x8-x15
+ ldm r8, {r0-r7} // (x8-x9,x12-x15,x10-x11)
+ __ldrd r8, r9, sp, 128
+ __ldrd r10, r11, sp, 136
+ add r0, r0, r8 // x8
+ add r1, r1, r9 // x9
+ add r6, r6, r10 // x10
+ add r7, r7, r11 // x11
+ _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ stmia r14!, {r0,r1,r6,r7}
+ __ldrd r8, r9, sp, 144
+ __ldrd r10, r11, sp, 152
+ add r2, r8, r2, ror #drot // x12
+ add r3, r9, r3, ror #drot // x13
+ add r4, r10, r4, ror #drot // x14
+ add r5, r11, r5, ror #drot // x15
+ _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ stmia r14, {r2-r5}
+
+ // Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
+ // Registers: r8 is block counter, r12 is IN.
+
+ ldr r9, [sp, #168] // LEN
+ ldr r14, [sp, #160] // OUT
+ cmp r9, #64
+ mov r0, sp
+ movle r1, r9
+ movgt r1, #64
+ // r1 is number of bytes to XOR, in range [1, 64]
+
+.if __LINUX_ARM_ARCH__ < 6
+ orr r2, r12, r14
+ tst r2, #3 // IN or OUT misaligned?
+ bne .Lxor_next_byte\@
+.endif
+
+ // XOR a word at a time
+.rept 16
+ subs r1, #4
+ blt .Lxor_words_done\@
+ ldr r2, [r12], #4
+ ldr r3, [r0], #4
+ eor r2, r2, r3
+ str r2, [r14], #4
+.endr
+ b .Lxor_slowpath_done\@
+.Lxor_words_done\@:
+ ands r1, r1, #3
+ beq .Lxor_slowpath_done\@
+
+ // XOR a byte at a time
+.Lxor_next_byte\@:
+ ldrb r2, [r12], #1
+ ldrb r3, [r0], #1
+ eor r2, r2, r3
+ strb r2, [r14], #1
+ subs r1, #1
+ bne .Lxor_next_byte\@
+
+.Lxor_slowpath_done\@:
+ subs r9, #64
+ add sp, #96
+ bgt .Lprepare_for_next_block\@
+
+.Ldone\@:
+.endm // _chacha
+
+/*
+ * void chacha_doarm(u8 *dst, const u8 *src, unsigned int bytes,
+ * const u32 *state, int nrounds);
+ */
+ENTRY(chacha_doarm)
+ cmp r2, #0 // len == 0?
+ reteq lr
+
+ ldr ip, [sp]
+ cmp ip, #12
+
+ push {r0-r2,r4-r11,lr}
+
+ // Push state x0-x15 onto stack.
+ // Also store an extra copy of x10-x11 just before the state.
+
+ add X12, r3, #48
+ ldm X12, {X12,X13,X14,X15}
+ push {X12,X13,X14,X15}
+ sub sp, sp, #64
+
+ __ldrd X8_X10, X9_X11, r3, 40
+ __strd X8_X10, X9_X11, sp, 8
+ __strd X8_X10, X9_X11, sp, 56
+ ldm r3, {X0-X9_X11}
+ __strd X0, X1, sp, 16
+ __strd X2, X3, sp, 24
+ __strd X4, X5, sp, 32
+ __strd X6, X7, sp, 40
+ __strd X8_X10, X9_X11, sp, 48
+
+ beq 1f
+ _chacha 20
+
+0: add sp, #76
+ pop {r4-r11, pc}
+
+1: _chacha 12
+ b 0b
+ENDPROC(chacha_doarm)
+
+/*
+ * void hchacha_block_arm(const u32 state[16], u32 out[8], int nrounds);
+ */
+ENTRY(hchacha_block_arm)
+ push {r1,r4-r11,lr}
+
+ cmp r2, #12 // ChaCha12 ?
+
+ mov r14, r0
+ ldmia r14!, {r0-r11} // load x0-x11
+ push {r10-r11} // store x10-x11 to stack
+ ldm r14, {r10-r12,r14} // load x12-x15
+ sub sp, #8
+
+ beq 1f
+ _chacha_permute 20
+
+ // Skip over (unused0-unused1, x10-x11)
+0: add sp, #16
+
+ // Fix up rotations of x12-x15
+ ror X12, X12, #drot
+ ror X13, X13, #drot
+ pop {r4} // load 'out'
+ ror X14, X14, #drot
+ ror X15, X15, #drot
+
+ // Store (x0-x3,x12-x15) to 'out'
+ stm r4, {X0,X1,X2,X3,X12,X13,X14,X15}
+
+ pop {r4-r11,pc}
+
+1: _chacha_permute 12
+ b 0b
+ENDPROC(hchacha_block_arm)
diff --git a/arch/arm/crypto/crct10dif-ce-core.S b/arch/arm/crypto/crct10dif-ce-core.S
index 86be258a803f..46c02c518a30 100644
--- a/arch/arm/crypto/crct10dif-ce-core.S
+++ b/arch/arm/crypto/crct10dif-ce-core.S
@@ -72,7 +72,7 @@
#endif
.text
- .arch armv7-a
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
init_crc .req r0
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
new file mode 100644
index 000000000000..be18af52e7dc
--- /dev/null
+++ b/arch/arm/crypto/curve25519-core.S
@@ -0,0 +1,2062 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <linux/linkage.h>
+
+.text
+.fpu neon
+.arch armv7-a
+.align 4
+
+ENTRY(curve25519_neon)
+ push {r4-r11, lr}
+ mov ip, sp
+ sub r3, sp, #704
+ and r3, r3, #0xfffffff0
+ mov sp, r3
+ movw r4, #0
+ movw r5, #254
+ vmov.i32 q0, #1
+ vshr.u64 q1, q0, #7
+ vshr.u64 q0, q0, #8
+ vmov.i32 d4, #19
+ vmov.i32 d5, #38
+ add r6, sp, #480
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d4-d5}, [r6, : 128]
+ add r6, r3, #0
+ vmov.i32 q2, #0
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+ add r6, r3, #0
+ movw r7, #960
+ sub r7, r7, #2
+ neg r7, r7
+ sub r7, r7, r7, LSL #7
+ str r7, [r6]
+ add r6, sp, #672
+ vld1.8 {d4-d5}, [r1]!
+ vld1.8 {d6-d7}, [r1]
+ vst1.8 {d4-d5}, [r6, : 128]!
+ vst1.8 {d6-d7}, [r6, : 128]
+ sub r1, r6, #16
+ ldrb r6, [r1]
+ and r6, r6, #248
+ strb r6, [r1]
+ ldrb r6, [r1, #31]
+ and r6, r6, #127
+ orr r6, r6, #64
+ strb r6, [r1, #31]
+ vmov.i64 q2, #0xffffffff
+ vshr.u64 q3, q2, #7
+ vshr.u64 q2, q2, #6
+ vld1.8 {d8}, [r2]
+ vld1.8 {d10}, [r2]
+ add r2, r2, #6
+ vld1.8 {d12}, [r2]
+ vld1.8 {d14}, [r2]
+ add r2, r2, #6
+ vld1.8 {d16}, [r2]
+ add r2, r2, #4
+ vld1.8 {d18}, [r2]
+ vld1.8 {d20}, [r2]
+ add r2, r2, #6
+ vld1.8 {d22}, [r2]
+ add r2, r2, #2
+ vld1.8 {d24}, [r2]
+ vld1.8 {d26}, [r2]
+ vshr.u64 q5, q5, #26
+ vshr.u64 q6, q6, #3
+ vshr.u64 q7, q7, #29
+ vshr.u64 q8, q8, #6
+ vshr.u64 q10, q10, #25
+ vshr.u64 q11, q11, #3
+ vshr.u64 q12, q12, #12
+ vshr.u64 q13, q13, #38
+ vand q4, q4, q2
+ vand q6, q6, q2
+ vand q8, q8, q2
+ vand q10, q10, q2
+ vand q2, q12, q2
+ vand q5, q5, q3
+ vand q7, q7, q3
+ vand q9, q9, q3
+ vand q11, q11, q3
+ vand q3, q13, q3
+ add r2, r3, #48
+ vadd.i64 q12, q4, q1
+ vadd.i64 q13, q10, q1
+ vshr.s64 q12, q12, #26
+ vshr.s64 q13, q13, #26
+ vadd.i64 q5, q5, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q14, q5, q0
+ vadd.i64 q11, q11, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q11, q0
+ vsub.i64 q4, q4, q12
+ vshr.s64 q12, q14, #25
+ vsub.i64 q10, q10, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q14, q6, q1
+ vadd.i64 q2, q2, q13
+ vsub.i64 q5, q5, q12
+ vshr.s64 q12, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q1
+ vadd.i64 q7, q7, q12
+ vshl.i64 q12, q12, #26
+ vadd.i64 q15, q7, q0
+ vsub.i64 q11, q11, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q12
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q3, q0
+ vadd.i64 q8, q8, q12
+ vshl.i64 q12, q12, #25
+ vadd.i64 q15, q8, q1
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q7, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q9, q9, q12
+ vtrn.32 d12, d14
+ vshl.i64 q12, q12, #26
+ vtrn.32 d13, d15
+ vadd.i64 q0, q9, q0
+ vadd.i64 q4, q4, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q6, q13, #4
+ vsub.i64 q7, q8, q12
+ vshr.s64 q0, q0, #25
+ vadd.i64 q4, q4, q6
+ vadd.i64 q6, q10, q0
+ vshl.i64 q0, q0, #25
+ vadd.i64 q8, q6, q1
+ vadd.i64 q4, q4, q13
+ vshl.i64 q10, q13, #25
+ vadd.i64 q1, q4, q1
+ vsub.i64 q0, q9, q0
+ vshr.s64 q8, q8, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d14, d0
+ vshr.s64 q1, q1, #26
+ vtrn.32 d15, d1
+ vadd.i64 q0, q11, q8
+ vst1.8 d14, [r2, : 64]
+ vshl.i64 q7, q8, #26
+ vadd.i64 q5, q5, q1
+ vtrn.32 d4, d6
+ vshl.i64 q1, q1, #26
+ vtrn.32 d5, d7
+ vsub.i64 q3, q6, q7
+ add r2, r2, #16
+ vsub.i64 q1, q4, q1
+ vst1.8 d4, [r2, : 64]
+ vtrn.32 d6, d0
+ vtrn.32 d7, d1
+ sub r2, r2, #8
+ vtrn.32 d2, d10
+ vtrn.32 d3, d11
+ vst1.8 d6, [r2, : 64]
+ sub r2, r2, #24
+ vst1.8 d2, [r2, : 64]
+ add r2, r3, #96
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #144
+ vmov.i32 q0, #0
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #240
+ vmov.i32 q0, #0
+ vmov.i64 d2, #0xff
+ vmov.i64 d3, #0
+ vshr.u32 q1, q1, #7
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 d0, [r2, : 64]
+ add r2, r3, #48
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d4, [r6, : 64]
+.Lmainloop:
+ mov r2, r5, LSR #3
+ and r6, r5, #7
+ ldrb r2, [r1, r2]
+ mov r2, r2, LSR r6
+ and r2, r2, #1
+ str r5, [sp, #456]
+ eor r4, r4, r2
+ str r2, [sp, #460]
+ neg r2, r4
+ add r4, r3, #96
+ add r5, r3, #192
+ add r6, r3, #144
+ vld1.8 {d8-d9}, [r4, : 128]!
+ add r7, r3, #240
+ vld1.8 {d10-d11}, [r5, : 128]!
+ veor q6, q4, q5
+ vld1.8 {d14-d15}, [r6, : 128]!
+ vdup.i32 q8, r2
+ vld1.8 {d18-d19}, [r7, : 128]!
+ veor q10, q7, q9
+ vld1.8 {d22-d23}, [r4, : 128]!
+ vand q6, q6, q8
+ vld1.8 {d24-d25}, [r5, : 128]!
+ vand q10, q10, q8
+ vld1.8 {d26-d27}, [r6, : 128]!
+ veor q4, q4, q6
+ vld1.8 {d28-d29}, [r7, : 128]!
+ veor q5, q5, q6
+ vld1.8 {d0}, [r4, : 64]
+ veor q6, q7, q10
+ vld1.8 {d2}, [r5, : 64]
+ veor q7, q9, q10
+ vld1.8 {d4}, [r6, : 64]
+ veor q9, q11, q12
+ vld1.8 {d6}, [r7, : 64]
+ veor q10, q0, q1
+ sub r2, r4, #32
+ vand q9, q9, q8
+ sub r4, r5, #32
+ vand q10, q10, q8
+ sub r5, r6, #32
+ veor q11, q11, q9
+ sub r6, r7, #32
+ veor q0, q0, q10
+ veor q9, q12, q9
+ veor q1, q1, q10
+ veor q10, q13, q14
+ veor q12, q2, q3
+ vand q10, q10, q8
+ vand q8, q12, q8
+ veor q12, q13, q10
+ veor q2, q2, q8
+ veor q10, q14, q10
+ veor q3, q3, q8
+ vadd.i32 q8, q4, q6
+ vsub.i32 q4, q4, q6
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vadd.i32 q6, q11, q12
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q4, q11, q12
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vadd.i32 q6, q0, q2
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vsub.i32 q0, q0, q2
+ vst1.8 d12, [r2, : 64]
+ vadd.i32 q2, q5, q7
+ vst1.8 d0, [r5, : 64]
+ vsub.i32 q0, q5, q7
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q9, q10
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q9, q10
+ vst1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q2, q1, q3
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vsub.i32 q0, q1, q3
+ vst1.8 d4, [r4, : 64]
+ vst1.8 d0, [r6, : 64]
+ add r2, sp, #512
+ add r4, r3, #96
+ add r5, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #288
+ vshl.i64 q12, q12, #25
+ add r4, r3, #336
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #240
+ add r4, r3, #96
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #144
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #192
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #144
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, r3, #288
+ add r4, r3, #336
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vsub.i32 q1, q1, q2
+ add r5, r3, #240
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vsub.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #144
+ add r4, r3, #96
+ add r5, r3, #144
+ add r6, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vsub.i32 q2, q0, q1
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vsub.i32 q4, q1, q3
+ vadd.i32 q1, q1, q3
+ vld1.8 {d6}, [r2, : 64]
+ vld1.8 {d10}, [r4, : 64]
+ vsub.i32 q6, q3, q5
+ vadd.i32 q3, q3, q5
+ vst1.8 {d4-d5}, [r5, : 128]!
+ vst1.8 {d0-d1}, [r6, : 128]!
+ vst1.8 {d8-d9}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r6, : 128]!
+ vst1.8 d12, [r5, : 64]
+ vst1.8 d6, [r6, : 64]
+ add r2, r3, #0
+ add r4, r3, #240
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #336
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #288
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #288
+ vshl.i64 q7, q7, #25
+ add r4, r3, #96
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ add r2, sp, #512
+ add r4, r3, #144
+ add r5, r3, #192
+ vld1.8 {d0-d1}, [r2, : 128]
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vzip.i32 q1, q2
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vld1.8 {d8-d9}, [r5, : 128]!
+ vshl.i32 q5, q1, #1
+ vzip.i32 q3, q4
+ vshl.i32 q6, q2, #1
+ vld1.8 {d14}, [r4, : 64]
+ vshl.i32 q8, q3, #1
+ vld1.8 {d15}, [r5, : 64]
+ vshl.i32 q9, q4, #1
+ vmul.i32 d21, d7, d1
+ vtrn.32 d14, d15
+ vmul.i32 q11, q4, q0
+ vmul.i32 q0, q7, q0
+ vmull.s32 q12, d2, d2
+ vmlal.s32 q12, d11, d1
+ vmlal.s32 q12, d12, d0
+ vmlal.s32 q12, d13, d23
+ vmlal.s32 q12, d16, d22
+ vmlal.s32 q12, d7, d21
+ vmull.s32 q10, d2, d11
+ vmlal.s32 q10, d4, d1
+ vmlal.s32 q10, d13, d0
+ vmlal.s32 q10, d6, d23
+ vmlal.s32 q10, d17, d22
+ vmull.s32 q13, d10, d4
+ vmlal.s32 q13, d11, d3
+ vmlal.s32 q13, d13, d1
+ vmlal.s32 q13, d16, d0
+ vmlal.s32 q13, d17, d23
+ vmlal.s32 q13, d8, d22
+ vmull.s32 q1, d10, d5
+ vmlal.s32 q1, d11, d4
+ vmlal.s32 q1, d6, d1
+ vmlal.s32 q1, d17, d0
+ vmlal.s32 q1, d8, d23
+ vmull.s32 q14, d10, d6
+ vmlal.s32 q14, d11, d13
+ vmlal.s32 q14, d4, d4
+ vmlal.s32 q14, d17, d1
+ vmlal.s32 q14, d18, d0
+ vmlal.s32 q14, d9, d23
+ vmull.s32 q11, d10, d7
+ vmlal.s32 q11, d11, d6
+ vmlal.s32 q11, d12, d5
+ vmlal.s32 q11, d8, d1
+ vmlal.s32 q11, d19, d0
+ vmull.s32 q15, d10, d8
+ vmlal.s32 q15, d11, d17
+ vmlal.s32 q15, d12, d6
+ vmlal.s32 q15, d13, d5
+ vmlal.s32 q15, d19, d1
+ vmlal.s32 q15, d14, d0
+ vmull.s32 q2, d10, d9
+ vmlal.s32 q2, d11, d8
+ vmlal.s32 q2, d12, d7
+ vmlal.s32 q2, d13, d6
+ vmlal.s32 q2, d14, d1
+ vmull.s32 q0, d15, d1
+ vmlal.s32 q0, d10, d14
+ vmlal.s32 q0, d11, d19
+ vmlal.s32 q0, d12, d8
+ vmlal.s32 q0, d13, d17
+ vmlal.s32 q0, d6, d6
+ add r2, sp, #480
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vmull.s32 q3, d16, d7
+ vmlal.s32 q3, d10, d15
+ vmlal.s32 q3, d11, d14
+ vmlal.s32 q3, d12, d9
+ vmlal.s32 q3, d13, d8
+ vld1.8 {d8-d9}, [r2, : 128]
+ vadd.i64 q5, q12, q9
+ vadd.i64 q6, q15, q9
+ vshr.s64 q5, q5, #26
+ vshr.s64 q6, q6, #26
+ vadd.i64 q7, q10, q5
+ vshl.i64 q5, q5, #26
+ vadd.i64 q8, q7, q4
+ vadd.i64 q2, q2, q6
+ vshl.i64 q6, q6, #26
+ vadd.i64 q10, q2, q4
+ vsub.i64 q5, q12, q5
+ vshr.s64 q8, q8, #25
+ vsub.i64 q6, q15, q6
+ vshr.s64 q10, q10, #25
+ vadd.i64 q12, q13, q8
+ vshl.i64 q8, q8, #25
+ vadd.i64 q13, q12, q9
+ vadd.i64 q0, q0, q10
+ vsub.i64 q7, q7, q8
+ vshr.s64 q8, q13, #26
+ vshl.i64 q10, q10, #25
+ vadd.i64 q13, q0, q9
+ vadd.i64 q1, q1, q8
+ vshl.i64 q8, q8, #26
+ vadd.i64 q15, q1, q4
+ vsub.i64 q2, q2, q10
+ vshr.s64 q10, q13, #26
+ vsub.i64 q8, q12, q8
+ vshr.s64 q12, q15, #25
+ vadd.i64 q3, q3, q10
+ vshl.i64 q10, q10, #26
+ vadd.i64 q13, q3, q4
+ vadd.i64 q14, q14, q12
+ add r2, r3, #144
+ vshl.i64 q12, q12, #25
+ add r4, r3, #192
+ vadd.i64 q15, q14, q9
+ add r2, r2, #8
+ vsub.i64 q0, q0, q10
+ add r4, r4, #8
+ vshr.s64 q10, q13, #25
+ vsub.i64 q1, q1, q12
+ vshr.s64 q12, q15, #26
+ vadd.i64 q13, q10, q10
+ vadd.i64 q11, q11, q12
+ vtrn.32 d16, d2
+ vshl.i64 q12, q12, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q11, q4
+ vadd.i64 q4, q5, q13
+ vst1.8 d16, [r2, : 64]!
+ vshl.i64 q5, q10, #4
+ vst1.8 d17, [r4, : 64]!
+ vsub.i64 q8, q14, q12
+ vshr.s64 q1, q1, #25
+ vadd.i64 q4, q4, q5
+ vadd.i64 q5, q6, q1
+ vshl.i64 q1, q1, #25
+ vadd.i64 q6, q5, q9
+ vadd.i64 q4, q4, q10
+ vshl.i64 q10, q10, #25
+ vadd.i64 q9, q4, q9
+ vsub.i64 q1, q11, q1
+ vshr.s64 q6, q6, #26
+ vsub.i64 q3, q3, q10
+ vtrn.32 d16, d2
+ vshr.s64 q9, q9, #26
+ vtrn.32 d17, d3
+ vadd.i64 q1, q2, q6
+ vst1.8 d16, [r2, : 64]
+ vshl.i64 q2, q6, #26
+ vst1.8 d17, [r4, : 64]
+ vadd.i64 q6, q7, q9
+ vtrn.32 d0, d6
+ vshl.i64 q7, q9, #26
+ vtrn.32 d1, d7
+ vsub.i64 q2, q5, q2
+ add r2, r2, #16
+ vsub.i64 q3, q4, q7
+ vst1.8 d0, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d1, [r4, : 64]
+ vtrn.32 d4, d2
+ vtrn.32 d5, d3
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d6, d12
+ vtrn.32 d7, d13
+ vst1.8 d4, [r2, : 64]
+ vst1.8 d5, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d6, [r2, : 64]
+ vst1.8 d7, [r4, : 64]
+ add r2, r3, #336
+ add r4, r3, #288
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vadd.i32 q0, q0, q1
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4-d5}, [r4, : 128]!
+ vadd.i32 q1, q1, q2
+ add r5, r3, #288
+ vld1.8 {d4}, [r2, : 64]
+ vld1.8 {d6}, [r4, : 64]
+ vadd.i32 q2, q2, q3
+ vst1.8 {d0-d1}, [r5, : 128]!
+ vst1.8 {d2-d3}, [r5, : 128]!
+ vst1.8 d4, [r5, : 64]
+ add r2, r3, #48
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r4, : 128]!
+ vld1.8 {d2-d3}, [r4, : 128]!
+ vld1.8 {d4}, [r4, : 64]
+ add r4, r3, #288
+ vld1.8 {d6-d7}, [r4, : 128]!
+ vtrn.32 q0, q3
+ vld1.8 {d8-d9}, [r4, : 128]!
+ vshl.i32 q5, q0, #4
+ vtrn.32 q1, q4
+ vshl.i32 q6, q3, #4
+ vadd.i32 q5, q5, q0
+ vadd.i32 q6, q6, q3
+ vshl.i32 q7, q1, #4
+ vld1.8 {d5}, [r4, : 64]
+ vshl.i32 q8, q4, #4
+ vtrn.32 d4, d5
+ vadd.i32 q7, q7, q1
+ vadd.i32 q8, q8, q4
+ vld1.8 {d18-d19}, [r2, : 128]!
+ vshl.i32 q10, q2, #4
+ vld1.8 {d22-d23}, [r2, : 128]!
+ vadd.i32 q10, q10, q2
+ vld1.8 {d24}, [r2, : 64]
+ vadd.i32 q5, q5, q0
+ add r2, r3, #240
+ vld1.8 {d26-d27}, [r2, : 128]!
+ vadd.i32 q6, q6, q3
+ vld1.8 {d28-d29}, [r2, : 128]!
+ vadd.i32 q8, q8, q4
+ vld1.8 {d25}, [r2, : 64]
+ vadd.i32 q10, q10, q2
+ vtrn.32 q9, q13
+ vadd.i32 q7, q7, q1
+ vadd.i32 q5, q5, q0
+ vtrn.32 q11, q14
+ vadd.i32 q6, q6, q3
+ add r2, sp, #528
+ vadd.i32 q10, q10, q2
+ vtrn.32 d24, d25
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q6, q13, #1
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vshl.i32 q10, q14, #1
+ vst1.8 {d12-d13}, [r2, : 128]!
+ vshl.i32 q15, q12, #1
+ vadd.i32 q8, q8, q4
+ vext.32 d10, d31, d30, #0
+ vadd.i32 q7, q7, q1
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q8, d18, d5
+ vmlal.s32 q8, d26, d4
+ vmlal.s32 q8, d19, d9
+ vmlal.s32 q8, d27, d3
+ vmlal.s32 q8, d22, d8
+ vmlal.s32 q8, d28, d2
+ vmlal.s32 q8, d23, d7
+ vmlal.s32 q8, d29, d1
+ vmlal.s32 q8, d24, d6
+ vmlal.s32 q8, d25, d0
+ vst1.8 {d14-d15}, [r2, : 128]!
+ vmull.s32 q2, d18, d4
+ vmlal.s32 q2, d12, d9
+ vmlal.s32 q2, d13, d8
+ vmlal.s32 q2, d19, d3
+ vmlal.s32 q2, d22, d2
+ vmlal.s32 q2, d23, d1
+ vmlal.s32 q2, d24, d0
+ vst1.8 {d20-d21}, [r2, : 128]!
+ vmull.s32 q7, d18, d9
+ vmlal.s32 q7, d26, d3
+ vmlal.s32 q7, d19, d8
+ vmlal.s32 q7, d27, d2
+ vmlal.s32 q7, d22, d7
+ vmlal.s32 q7, d28, d1
+ vmlal.s32 q7, d23, d6
+ vmlal.s32 q7, d29, d0
+ vst1.8 {d10-d11}, [r2, : 128]!
+ vmull.s32 q5, d18, d3
+ vmlal.s32 q5, d19, d2
+ vmlal.s32 q5, d22, d1
+ vmlal.s32 q5, d23, d0
+ vmlal.s32 q5, d12, d8
+ vst1.8 {d16-d17}, [r2, : 128]!
+ vmull.s32 q4, d18, d8
+ vmlal.s32 q4, d26, d2
+ vmlal.s32 q4, d19, d7
+ vmlal.s32 q4, d27, d1
+ vmlal.s32 q4, d22, d6
+ vmlal.s32 q4, d28, d0
+ vmull.s32 q8, d18, d7
+ vmlal.s32 q8, d26, d1
+ vmlal.s32 q8, d19, d6
+ vmlal.s32 q8, d27, d0
+ add r2, sp, #544
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q7, d24, d21
+ vmlal.s32 q7, d25, d20
+ vmlal.s32 q4, d23, d21
+ vmlal.s32 q4, d29, d20
+ vmlal.s32 q8, d22, d21
+ vmlal.s32 q8, d28, d20
+ vmlal.s32 q5, d24, d20
+ vst1.8 {d14-d15}, [r2, : 128]
+ vmull.s32 q7, d18, d6
+ vmlal.s32 q7, d26, d0
+ add r2, sp, #624
+ vld1.8 {d30-d31}, [r2, : 128]
+ vmlal.s32 q2, d30, d21
+ vmlal.s32 q7, d19, d21
+ vmlal.s32 q7, d27, d20
+ add r2, sp, #592
+ vld1.8 {d26-d27}, [r2, : 128]
+ vmlal.s32 q4, d25, d27
+ vmlal.s32 q8, d29, d27
+ vmlal.s32 q8, d25, d26
+ vmlal.s32 q7, d28, d27
+ vmlal.s32 q7, d29, d26
+ add r2, sp, #576
+ vld1.8 {d28-d29}, [r2, : 128]
+ vmlal.s32 q4, d24, d29
+ vmlal.s32 q8, d23, d29
+ vmlal.s32 q8, d24, d28
+ vmlal.s32 q7, d22, d29
+ vmlal.s32 q7, d23, d28
+ vst1.8 {d8-d9}, [r2, : 128]
+ add r2, sp, #528
+ vld1.8 {d8-d9}, [r2, : 128]
+ vmlal.s32 q7, d24, d9
+ vmlal.s32 q7, d25, d31
+ vmull.s32 q1, d18, d2
+ vmlal.s32 q1, d19, d1
+ vmlal.s32 q1, d22, d0
+ vmlal.s32 q1, d24, d27
+ vmlal.s32 q1, d23, d20
+ vmlal.s32 q1, d12, d7
+ vmlal.s32 q1, d13, d6
+ vmull.s32 q6, d18, d1
+ vmlal.s32 q6, d19, d0
+ vmlal.s32 q6, d23, d27
+ vmlal.s32 q6, d22, d20
+ vmlal.s32 q6, d24, d26
+ vmull.s32 q0, d18, d0
+ vmlal.s32 q0, d22, d27
+ vmlal.s32 q0, d23, d26
+ vmlal.s32 q0, d24, d31
+ vmlal.s32 q0, d19, d20
+ add r2, sp, #608
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q2, d18, d7
+ vmlal.s32 q5, d18, d6
+ vmlal.s32 q1, d18, d21
+ vmlal.s32 q0, d18, d28
+ vmlal.s32 q6, d18, d29
+ vmlal.s32 q2, d19, d6
+ vmlal.s32 q5, d19, d21
+ vmlal.s32 q1, d19, d29
+ vmlal.s32 q0, d19, d9
+ vmlal.s32 q6, d19, d28
+ add r2, sp, #560
+ vld1.8 {d18-d19}, [r2, : 128]
+ add r2, sp, #480
+ vld1.8 {d22-d23}, [r2, : 128]
+ vmlal.s32 q5, d19, d7
+ vmlal.s32 q0, d18, d21
+ vmlal.s32 q0, d19, d29
+ vmlal.s32 q6, d18, d6
+ add r2, sp, #496
+ vld1.8 {d6-d7}, [r2, : 128]
+ vmlal.s32 q6, d19, d21
+ add r2, sp, #544
+ vld1.8 {d18-d19}, [r2, : 128]
+ vmlal.s32 q0, d30, d8
+ add r2, sp, #640
+ vld1.8 {d20-d21}, [r2, : 128]
+ vmlal.s32 q5, d30, d29
+ add r2, sp, #576
+ vld1.8 {d24-d25}, [r2, : 128]
+ vmlal.s32 q1, d30, d28
+ vadd.i64 q13, q0, q11
+ vadd.i64 q14, q5, q11
+ vmlal.s32 q6, d30, d9
+ vshr.s64 q4, q13, #26
+ vshr.s64 q13, q14, #26
+ vadd.i64 q7, q7, q4
+ vshl.i64 q4, q4, #26
+ vadd.i64 q14, q7, q3
+ vadd.i64 q9, q9, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q15, q9, q3
+ vsub.i64 q0, q0, q4
+ vshr.s64 q4, q14, #25
+ vsub.i64 q5, q5, q13
+ vshr.s64 q13, q15, #25
+ vadd.i64 q6, q6, q4
+ vshl.i64 q4, q4, #25
+ vadd.i64 q14, q6, q11
+ vadd.i64 q2, q2, q13
+ vsub.i64 q4, q7, q4
+ vshr.s64 q7, q14, #26
+ vshl.i64 q13, q13, #25
+ vadd.i64 q14, q2, q11
+ vadd.i64 q8, q8, q7
+ vshl.i64 q7, q7, #26
+ vadd.i64 q15, q8, q3
+ vsub.i64 q9, q9, q13
+ vshr.s64 q13, q14, #26
+ vsub.i64 q6, q6, q7
+ vshr.s64 q7, q15, #25
+ vadd.i64 q10, q10, q13
+ vshl.i64 q13, q13, #26
+ vadd.i64 q14, q10, q3
+ vadd.i64 q1, q1, q7
+ add r2, r3, #240
+ vshl.i64 q7, q7, #25
+ add r4, r3, #144
+ vadd.i64 q15, q1, q11
+ add r2, r2, #8
+ vsub.i64 q2, q2, q13
+ add r4, r4, #8
+ vshr.s64 q13, q14, #25
+ vsub.i64 q7, q8, q7
+ vshr.s64 q8, q15, #26
+ vadd.i64 q14, q13, q13
+ vadd.i64 q12, q12, q8
+ vtrn.32 d12, d14
+ vshl.i64 q8, q8, #26
+ vtrn.32 d13, d15
+ vadd.i64 q3, q12, q3
+ vadd.i64 q0, q0, q14
+ vst1.8 d12, [r2, : 64]!
+ vshl.i64 q7, q13, #4
+ vst1.8 d13, [r4, : 64]!
+ vsub.i64 q1, q1, q8
+ vshr.s64 q3, q3, #25
+ vadd.i64 q0, q0, q7
+ vadd.i64 q5, q5, q3
+ vshl.i64 q3, q3, #25
+ vadd.i64 q6, q5, q11
+ vadd.i64 q0, q0, q13
+ vshl.i64 q7, q13, #25
+ vadd.i64 q8, q0, q11
+ vsub.i64 q3, q12, q3
+ vshr.s64 q6, q6, #26
+ vsub.i64 q7, q10, q7
+ vtrn.32 d2, d6
+ vshr.s64 q8, q8, #26
+ vtrn.32 d3, d7
+ vadd.i64 q3, q9, q6
+ vst1.8 d2, [r2, : 64]
+ vshl.i64 q6, q6, #26
+ vst1.8 d3, [r4, : 64]
+ vadd.i64 q1, q4, q8
+ vtrn.32 d4, d14
+ vshl.i64 q4, q8, #26
+ vtrn.32 d5, d15
+ vsub.i64 q5, q5, q6
+ add r2, r2, #16
+ vsub.i64 q0, q0, q4
+ vst1.8 d4, [r2, : 64]
+ add r4, r4, #16
+ vst1.8 d5, [r4, : 64]
+ vtrn.32 d10, d6
+ vtrn.32 d11, d7
+ sub r2, r2, #8
+ sub r4, r4, #8
+ vtrn.32 d0, d2
+ vtrn.32 d1, d3
+ vst1.8 d10, [r2, : 64]
+ vst1.8 d11, [r4, : 64]
+ sub r2, r2, #24
+ sub r4, r4, #24
+ vst1.8 d0, [r2, : 64]
+ vst1.8 d1, [r4, : 64]
+ ldr r2, [sp, #456]
+ ldr r4, [sp, #460]
+ subs r5, r2, #1
+ bge .Lmainloop
+ add r1, r3, #144
+ add r2, r3, #336
+ vld1.8 {d0-d1}, [r1, : 128]!
+ vld1.8 {d2-d3}, [r1, : 128]!
+ vld1.8 {d4}, [r1, : 64]
+ vst1.8 {d0-d1}, [r2, : 128]!
+ vst1.8 {d2-d3}, [r2, : 128]!
+ vst1.8 d4, [r2, : 64]
+ movw r1, #0
+.Linvertloop:
+ add r2, r3, #144
+ movw r4, #0
+ movw r5, #2
+ cmp r1, #1
+ moveq r5, #1
+ addeq r2, r3, #336
+ addeq r4, r3, #48
+ cmp r1, #2
+ moveq r5, #1
+ addeq r2, r3, #48
+ cmp r1, #3
+ moveq r5, #5
+ addeq r4, r3, #336
+ cmp r1, #4
+ moveq r5, #10
+ cmp r1, #5
+ moveq r5, #20
+ cmp r1, #6
+ moveq r5, #10
+ addeq r2, r3, #336
+ addeq r4, r3, #336
+ cmp r1, #7
+ moveq r5, #50
+ cmp r1, #8
+ moveq r5, #100
+ cmp r1, #9
+ moveq r5, #50
+ addeq r2, r3, #336
+ cmp r1, #10
+ moveq r5, #5
+ addeq r2, r3, #48
+ cmp r1, #11
+ moveq r5, #0
+ addeq r2, r3, #96
+ add r6, r3, #144
+ add r7, r3, #288
+ vld1.8 {d0-d1}, [r6, : 128]!
+ vld1.8 {d2-d3}, [r6, : 128]!
+ vld1.8 {d4}, [r6, : 64]
+ vst1.8 {d0-d1}, [r7, : 128]!
+ vst1.8 {d2-d3}, [r7, : 128]!
+ vst1.8 d4, [r7, : 64]
+ cmp r5, #0
+ beq .Lskipsquaringloop
+.Lsquaringloop:
+ add r6, r3, #288
+ add r7, r3, #288
+ add r8, r3, #288
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r7, : 128]!
+ vld1.8 {d6-d7}, [r7, : 128]!
+ vld1.8 {d9}, [r7, : 64]
+ vld1.8 {d10-d11}, [r6, : 128]!
+ add r7, sp, #384
+ vld1.8 {d12-d13}, [r6, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r6, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r7, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r7, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r7, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r7, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r7, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r6, r7, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r6, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r6, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r6, r6, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r6, : 64]
+ sub r6, r6, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r7, sp, #480
+ vld1.8 {d14-d15}, [r7, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r6, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r6, : 64]!
+ add r6, sp, #496
+ vld1.8 {d20-d21}, [r6, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r6, r8, #8
+ vst1.8 d0, [r6, : 64]
+ vsub.i64 d19, d19, d9
+ add r6, r6, #16
+ vst1.8 d16, [r6, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r6, r6, #8
+ vst1.8 d6, [r6, : 64]
+ add r6, r6, #16
+ vst1.8 d18, [r6, : 64]
+ vzip.i32 q2, q5
+ sub r6, r6, #32
+ vst1.8 d4, [r6, : 64]
+ subs r5, r5, #1
+ bhi .Lsquaringloop
+.Lskipsquaringloop:
+ mov r2, r2
+ add r5, r3, #288
+ add r6, r3, #144
+ vmov.i32 q0, #19
+ vmov.i32 q1, #0
+ vmov.i32 q2, #1
+ vzip.i32 q1, q2
+ vld1.8 {d4-d5}, [r5, : 128]!
+ vld1.8 {d6-d7}, [r5, : 128]!
+ vld1.8 {d9}, [r5, : 64]
+ vld1.8 {d10-d11}, [r2, : 128]!
+ add r5, sp, #384
+ vld1.8 {d12-d13}, [r2, : 128]!
+ vmul.i32 q7, q2, q0
+ vld1.8 {d8}, [r2, : 64]
+ vext.32 d17, d11, d10, #1
+ vmul.i32 q9, q3, q0
+ vext.32 d16, d10, d8, #1
+ vshl.u32 q10, q5, q1
+ vext.32 d22, d14, d4, #1
+ vext.32 d24, d18, d6, #1
+ vshl.u32 q13, q6, q1
+ vshl.u32 d28, d8, d2
+ vrev64.i32 d22, d22
+ vmul.i32 d1, d9, d1
+ vrev64.i32 d24, d24
+ vext.32 d29, d8, d13, #1
+ vext.32 d0, d1, d9, #1
+ vrev64.i32 d0, d0
+ vext.32 d2, d9, d1, #1
+ vext.32 d23, d15, d5, #1
+ vmull.s32 q4, d20, d4
+ vrev64.i32 d23, d23
+ vmlal.s32 q4, d21, d1
+ vrev64.i32 d2, d2
+ vmlal.s32 q4, d26, d19
+ vext.32 d3, d5, d15, #1
+ vmlal.s32 q4, d27, d18
+ vrev64.i32 d3, d3
+ vmlal.s32 q4, d28, d15
+ vext.32 d14, d12, d11, #1
+ vmull.s32 q5, d16, d23
+ vext.32 d15, d13, d12, #1
+ vmlal.s32 q5, d17, d4
+ vst1.8 d8, [r5, : 64]!
+ vmlal.s32 q5, d14, d1
+ vext.32 d12, d9, d8, #0
+ vmlal.s32 q5, d15, d19
+ vmov.i64 d13, #0
+ vmlal.s32 q5, d29, d18
+ vext.32 d25, d19, d7, #1
+ vmlal.s32 q6, d20, d5
+ vrev64.i32 d25, d25
+ vmlal.s32 q6, d21, d4
+ vst1.8 d11, [r5, : 64]!
+ vmlal.s32 q6, d26, d1
+ vext.32 d9, d10, d10, #0
+ vmlal.s32 q6, d27, d19
+ vmov.i64 d8, #0
+ vmlal.s32 q6, d28, d18
+ vmlal.s32 q4, d16, d24
+ vmlal.s32 q4, d17, d5
+ vmlal.s32 q4, d14, d4
+ vst1.8 d12, [r5, : 64]!
+ vmlal.s32 q4, d15, d1
+ vext.32 d10, d13, d12, #0
+ vmlal.s32 q4, d29, d19
+ vmov.i64 d11, #0
+ vmlal.s32 q5, d20, d6
+ vmlal.s32 q5, d21, d5
+ vmlal.s32 q5, d26, d4
+ vext.32 d13, d8, d8, #0
+ vmlal.s32 q5, d27, d1
+ vmov.i64 d12, #0
+ vmlal.s32 q5, d28, d19
+ vst1.8 d9, [r5, : 64]!
+ vmlal.s32 q6, d16, d25
+ vmlal.s32 q6, d17, d6
+ vst1.8 d10, [r5, : 64]
+ vmlal.s32 q6, d14, d5
+ vext.32 d8, d11, d10, #0
+ vmlal.s32 q6, d15, d4
+ vmov.i64 d9, #0
+ vmlal.s32 q6, d29, d1
+ vmlal.s32 q4, d20, d7
+ vmlal.s32 q4, d21, d6
+ vmlal.s32 q4, d26, d5
+ vext.32 d11, d12, d12, #0
+ vmlal.s32 q4, d27, d4
+ vmov.i64 d10, #0
+ vmlal.s32 q4, d28, d1
+ vmlal.s32 q5, d16, d0
+ sub r2, r5, #32
+ vmlal.s32 q5, d17, d7
+ vmlal.s32 q5, d14, d6
+ vext.32 d30, d9, d8, #0
+ vmlal.s32 q5, d15, d5
+ vld1.8 {d31}, [r2, : 64]!
+ vmlal.s32 q5, d29, d4
+ vmlal.s32 q15, d20, d0
+ vext.32 d0, d6, d18, #1
+ vmlal.s32 q15, d21, d25
+ vrev64.i32 d0, d0
+ vmlal.s32 q15, d26, d24
+ vext.32 d1, d7, d19, #1
+ vext.32 d7, d10, d10, #0
+ vmlal.s32 q15, d27, d23
+ vrev64.i32 d1, d1
+ vld1.8 {d6}, [r2, : 64]
+ vmlal.s32 q15, d28, d22
+ vmlal.s32 q3, d16, d4
+ add r2, r2, #24
+ vmlal.s32 q3, d17, d2
+ vext.32 d4, d31, d30, #0
+ vmov d17, d11
+ vmlal.s32 q3, d14, d1
+ vext.32 d11, d13, d13, #0
+ vext.32 d13, d30, d30, #0
+ vmlal.s32 q3, d15, d0
+ vext.32 d1, d8, d8, #0
+ vmlal.s32 q3, d29, d3
+ vld1.8 {d5}, [r2, : 64]
+ sub r2, r2, #16
+ vext.32 d10, d6, d6, #0
+ vmov.i32 q1, #0xffffffff
+ vshl.i64 q4, q1, #25
+ add r5, sp, #480
+ vld1.8 {d14-d15}, [r5, : 128]
+ vadd.i64 q9, q2, q7
+ vshl.i64 q1, q1, #26
+ vshr.s64 q10, q9, #26
+ vld1.8 {d0}, [r2, : 64]!
+ vadd.i64 q5, q5, q10
+ vand q9, q9, q1
+ vld1.8 {d16}, [r2, : 64]!
+ add r2, sp, #496
+ vld1.8 {d20-d21}, [r2, : 128]
+ vadd.i64 q11, q5, q10
+ vsub.i64 q2, q2, q9
+ vshr.s64 q9, q11, #25
+ vext.32 d12, d5, d4, #0
+ vand q11, q11, q4
+ vadd.i64 q0, q0, q9
+ vmov d19, d7
+ vadd.i64 q3, q0, q7
+ vsub.i64 q5, q5, q11
+ vshr.s64 q11, q3, #26
+ vext.32 d18, d11, d10, #0
+ vand q3, q3, q1
+ vadd.i64 q8, q8, q11
+ vadd.i64 q11, q8, q10
+ vsub.i64 q0, q0, q3
+ vshr.s64 q3, q11, #25
+ vand q11, q11, q4
+ vadd.i64 q3, q6, q3
+ vadd.i64 q6, q3, q7
+ vsub.i64 q8, q8, q11
+ vshr.s64 q11, q6, #26
+ vand q6, q6, q1
+ vadd.i64 q9, q9, q11
+ vadd.i64 d25, d19, d21
+ vsub.i64 q3, q3, q6
+ vshr.s64 d23, d25, #25
+ vand q4, q12, q4
+ vadd.i64 d21, d23, d23
+ vshl.i64 d25, d23, #4
+ vadd.i64 d21, d21, d23
+ vadd.i64 d25, d25, d21
+ vadd.i64 d4, d4, d25
+ vzip.i32 q0, q8
+ vadd.i64 d12, d4, d14
+ add r2, r6, #8
+ vst1.8 d0, [r2, : 64]
+ vsub.i64 d19, d19, d9
+ add r2, r2, #16
+ vst1.8 d16, [r2, : 64]
+ vshr.s64 d22, d12, #26
+ vand q0, q6, q1
+ vadd.i64 d10, d10, d22
+ vzip.i32 q3, q9
+ vsub.i64 d4, d4, d0
+ sub r2, r2, #8
+ vst1.8 d6, [r2, : 64]
+ add r2, r2, #16
+ vst1.8 d18, [r2, : 64]
+ vzip.i32 q2, q5
+ sub r2, r2, #32
+ vst1.8 d4, [r2, : 64]
+ cmp r4, #0
+ beq .Lskippostcopy
+ add r2, r3, #144
+ mov r4, r4
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+.Lskippostcopy:
+ cmp r1, #1
+ bne .Lskipfinalcopy
+ add r2, r3, #288
+ add r4, r3, #144
+ vld1.8 {d0-d1}, [r2, : 128]!
+ vld1.8 {d2-d3}, [r2, : 128]!
+ vld1.8 {d4}, [r2, : 64]
+ vst1.8 {d0-d1}, [r4, : 128]!
+ vst1.8 {d2-d3}, [r4, : 128]!
+ vst1.8 d4, [r4, : 64]
+.Lskipfinalcopy:
+ add r1, r1, #1
+ cmp r1, #12
+ blo .Linvertloop
+ add r1, r3, #144
+ ldr r2, [r1], #4
+ ldr r3, [r1], #4
+ ldr r4, [r1], #4
+ ldr r5, [r1], #4
+ ldr r6, [r1], #4
+ ldr r7, [r1], #4
+ ldr r8, [r1], #4
+ ldr r9, [r1], #4
+ ldr r10, [r1], #4
+ ldr r1, [r1]
+ add r11, r1, r1, LSL #4
+ add r11, r11, r1, LSL #1
+ add r11, r11, #16777216
+ mov r11, r11, ASR #25
+ add r11, r11, r2
+ mov r11, r11, ASR #26
+ add r11, r11, r3
+ mov r11, r11, ASR #25
+ add r11, r11, r4
+ mov r11, r11, ASR #26
+ add r11, r11, r5
+ mov r11, r11, ASR #25
+ add r11, r11, r6
+ mov r11, r11, ASR #26
+ add r11, r11, r7
+ mov r11, r11, ASR #25
+ add r11, r11, r8
+ mov r11, r11, ASR #26
+ add r11, r11, r9
+ mov r11, r11, ASR #25
+ add r11, r11, r10
+ mov r11, r11, ASR #26
+ add r11, r11, r1
+ mov r11, r11, ASR #25
+ add r2, r2, r11
+ add r2, r2, r11, LSL #1
+ add r2, r2, r11, LSL #4
+ mov r11, r2, ASR #26
+ add r3, r3, r11
+ sub r2, r2, r11, LSL #26
+ mov r11, r3, ASR #25
+ add r4, r4, r11
+ sub r3, r3, r11, LSL #25
+ mov r11, r4, ASR #26
+ add r5, r5, r11
+ sub r4, r4, r11, LSL #26
+ mov r11, r5, ASR #25
+ add r6, r6, r11
+ sub r5, r5, r11, LSL #25
+ mov r11, r6, ASR #26
+ add r7, r7, r11
+ sub r6, r6, r11, LSL #26
+ mov r11, r7, ASR #25
+ add r8, r8, r11
+ sub r7, r7, r11, LSL #25
+ mov r11, r8, ASR #26
+ add r9, r9, r11
+ sub r8, r8, r11, LSL #26
+ mov r11, r9, ASR #25
+ add r10, r10, r11
+ sub r9, r9, r11, LSL #25
+ mov r11, r10, ASR #26
+ add r1, r1, r11
+ sub r10, r10, r11, LSL #26
+ mov r11, r1, ASR #25
+ sub r1, r1, r11, LSL #25
+ add r2, r2, r3, LSL #26
+ mov r3, r3, LSR #6
+ add r3, r3, r4, LSL #19
+ mov r4, r4, LSR #13
+ add r4, r4, r5, LSL #13
+ mov r5, r5, LSR #19
+ add r5, r5, r6, LSL #6
+ add r6, r7, r8, LSL #25
+ mov r7, r8, LSR #7
+ add r7, r7, r9, LSL #19
+ mov r8, r9, LSR #13
+ add r8, r8, r10, LSL #12
+ mov r9, r10, LSR #20
+ add r1, r9, r1, LSL #6
+ str r2, [r0]
+ str r3, [r0, #4]
+ str r4, [r0, #8]
+ str r5, [r0, #12]
+ str r6, [r0, #16]
+ str r7, [r0, #20]
+ str r8, [r0, #24]
+ str r1, [r0, #28]
+ movw r0, #0
+ mov sp, ip
+ pop {r4-r11, pc}
+ENDPROC(curve25519_neon)
diff --git a/arch/arm/crypto/curve25519-glue.c b/arch/arm/crypto/curve25519-glue.c
new file mode 100644
index 000000000000..2e9e12d2f642
--- /dev/null
+++ b/arch/arm/crypto/curve25519-glue.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ *
+ * Based on public domain code from Daniel J. Bernstein and Peter Schwabe. This
+ * began from SUPERCOP's curve25519/neon2/scalarmult.s, but has subsequently been
+ * manually reworked for use in kernel space.
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <crypto/internal/kpp.h>
+#include <crypto/internal/simd.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/jump_label.h>
+#include <crypto/curve25519.h>
+
+asmlinkage void curve25519_neon(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE]);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void curve25519_arch(u8 out[CURVE25519_KEY_SIZE],
+ const u8 scalar[CURVE25519_KEY_SIZE],
+ const u8 point[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ curve25519_neon(out, scalar, point);
+ kernel_neon_end();
+ } else {
+ curve25519_generic(out, scalar, point);
+ }
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_value(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+ u8 const *bp;
+
+ if (req->src) {
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+ bp = public_key;
+ } else {
+ bp = curve25519_base_point;
+ }
+
+ curve25519_arch(buf, secret, bp);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-neon",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_compute_value,
+ .compute_shared_secret = curve25519_compute_value,
+ .max_size = curve25519_max_size,
+};
+
+static int __init mod_init(void)
+{
+ if (elf_hwcap & HWCAP_NEON) {
+ static_branch_enable(&have_neon);
+ return crypto_register_kpp(&curve25519_alg);
+ }
+ return 0;
+}
+
+static void __exit mod_exit(void)
+{
+ if (elf_hwcap & HWCAP_NEON)
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-neon");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/crypto/ghash-ce-core.S b/arch/arm/crypto/ghash-ce-core.S
index c47fe81abcb0..534c9647726d 100644
--- a/arch/arm/crypto/ghash-ce-core.S
+++ b/arch/arm/crypto/ghash-ce-core.S
@@ -88,6 +88,7 @@
T3_H .req d17
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
.macro __pmull_p64, rd, rn, rm, b1, b2, b3, b4
diff --git a/arch/arm/crypto/poly1305-armv4.pl b/arch/arm/crypto/poly1305-armv4.pl
new file mode 100644
index 000000000000..6d79498d3115
--- /dev/null
+++ b/arch/arm/crypto/poly1305-armv4.pl
@@ -0,0 +1,1236 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# IALU(*)/gcc-4.4 NEON
+#
+# ARM11xx(ARMv6) 7.78/+100% -
+# Cortex-A5 6.35/+130% 3.00
+# Cortex-A8 6.25/+115% 2.36
+# Cortex-A9 5.10/+95% 2.55
+# Cortex-A15 3.85/+85% 1.25(**)
+# Snapdragon S4 5.70/+100% 1.48(**)
+#
+# (*) this is for -march=armv6, i.e. with bunch of ldrb loading data;
+# (**) these are trade-off results, they can be improved by ~8% but at
+# the cost of 15/12% regression on Cortex-A5/A7, it's even possible
+# to improve Cortex-A9 result, but then A5/A7 loose more than 20%;
+
+$flavour = shift;
+if ($flavour=~/\w[\w\-]*\.\w+$/) { $output=$flavour; undef $flavour; }
+else { while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} }
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+($ctx,$inp,$len,$padbit)=map("r$_",(0..3));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp $inp,#0
+ str r3,[$ctx,#0] @ zero hash value
+ str r3,[$ctx,#4]
+ str r3,[$ctx,#8]
+ str r3,[$ctx,#12]
+ str r3,[$ctx,#16]
+ str r3,[$ctx,#36] @ clear is_base2_26
+ add $ctx,$ctx,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[$ctx,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[$inp,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[$inp,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[$inp,#2]
+ ldrb r7,[$inp,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[$inp,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[$inp,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[$inp,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[$inp,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[$inp,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[$inp,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[$inp,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[$inp,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[$inp,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[$inp,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[$inp,#14]
+ and r6,r6,r3
+
+ ldrb r10,[$inp,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[$ctx,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[$ctx,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[$ctx,#8]
+ and r7,r7,r3
+ str r7,[$ctx,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+___
+{
+my ($h0,$h1,$h2,$h3,$h4,$r0,$r1,$r2,$r3)=map("r$_",(4..12));
+my ($s1,$s2,$s3)=($r1,$r2,$r3);
+
+$code.=<<___;
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands $len,$len,#-16
+ beq .Lno_data
+
+ add $len,$len,$inp @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia $ctx,{$h0-$r3} @ load context
+ add $ctx,$ctx,#20
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+#else
+ ldr lr,[$ctx,#36] @ is_base2_26
+ ldmia $ctx!,{$h0-$h4} @ load hash value
+ str $len,[sp,#16] @ offload stuff
+ str $ctx,[sp,#12]
+
+ adds $r0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $r1,$h1,lsr#6
+ adcs $r1,$r1,$h2,lsl#20
+ mov $r2,$h2,lsr#12
+ adcs $r2,$r2,$h3,lsl#14
+ mov $r3,$h3,lsr#18
+ adcs $r3,$r3,$h4,lsl#8
+ mov $len,#0
+ teq lr,#0
+ str $len,[$ctx,#16] @ clear is_base2_26
+ adc $len,$len,$h4,lsr#24
+
+ itttt ne
+ movne $h0,$r0 @ choose between radixes
+ movne $h1,$r1
+ movne $h2,$r2
+ movne $h3,$r3
+ ldmia $ctx,{$r0-$r3} @ load key
+ it ne
+ movne $h4,$len
+#endif
+
+ mov lr,$inp
+ cmp $padbit,#0
+ str $r1,[sp,#20]
+ str $r2,[sp,#24]
+ str $r3,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi $h4,$h4,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds $h0,$h0,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs $h1,$h1,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs $h2,$h2,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add $s1,$r1,$r1,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi $h4,$h4,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds $h0,$h0,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs $h1,$h1,r1
+ add $s1,$r1,$r1,lsr#2
+ adcs $h2,$h2,r2
+#endif
+ add $s2,$r2,$r2,lsr#2
+ adcs $h3,$h3,r3
+ add $s3,$r3,$r3,lsr#2
+
+ umull r2,r3,$h1,$r0
+ adc $h4,$h4,#0
+ umull r0,r1,$h0,$r0
+ umlal r2,r3,$h4,$s1
+ umlal r0,r1,$h3,$s1
+ ldr $r1,[sp,#20] @ reload $r1
+ umlal r2,r3,$h2,$s3
+ umlal r0,r1,$h1,$s3
+ umlal r2,r3,$h3,$s2
+ umlal r0,r1,$h2,$s2
+ umlal r2,r3,$h0,$r1
+ str r0,[sp,#0] @ future $h0
+ mul r0,$s2,$h4
+ ldr $r2,[sp,#24] @ reload $r2
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future $h2
+ str r2,[sp,#4] @ future $h1
+
+ mul r2,$s3,$h4
+ eor r3,r3,r3
+ umlal r0,r1,$h3,$s3
+ ldr $r3,[sp,#28] @ reload $r3
+ umlal r2,r3,$h3,$r0
+ umlal r0,r1,$h2,$r0
+ umlal r2,r3,$h2,$r1
+ umlal r0,r1,$h1,$r1
+ umlal r2,r3,$h1,$r2
+ umlal r0,r1,$h0,$r2
+ umlal r2,r3,$h0,$r3
+ ldr $h0,[sp,#0]
+ mul $h4,$r0,$h4
+ ldr $h1,[sp,#4]
+
+ adds $h2,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds $h3,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add $h4,$h4,r3 @ h4+=d3>>32
+
+ and r1,$h4,#-4
+ and $h4,$h4,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds $h0,$h0,r1
+ adcs $h1,$h1,#0
+ adcs $h2,$h2,#0
+ adcs $h3,$h3,#0
+ adc $h4,$h4,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr $ctx,[sp,#12]
+ add sp,sp,#32
+ stmdb $ctx,{$h0-$h4} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce)=map("r$_",(0..2));
+my ($h0,$h1,$h2,$h3,$h4,$g0,$g1,$g2,$g3)=map("r$_",(3..11));
+my $g4=$ctx;
+
+$code.=<<___;
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia $ctx,{$h0-$h4}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ adds $g0,$h0,$h1,lsl#26 @ base 2^26 -> base 2^32
+ mov $g1,$h1,lsr#6
+ adcs $g1,$g1,$h2,lsl#20
+ mov $g2,$h2,lsr#12
+ adcs $g2,$g2,$h3,lsl#14
+ mov $g3,$h3,lsr#18
+ adcs $g3,$g3,$h4,lsl#8
+ mov $g4,#0
+ adc $g4,$g4,$h4,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne $h0,$g0
+ movne $h1,$g1
+ movne $h2,$g2
+ movne $h3,$g3
+ it ne
+ movne $h4,$g4
+#endif
+
+ adds $g0,$h0,#5 @ compare to modulus
+ adcs $g1,$h1,#0
+ adcs $g2,$h2,#0
+ adcs $g3,$h3,#0
+ adc $g4,$h4,#0
+ tst $g4,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h0,$g0
+ ldr $g0,[$nonce,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h1,$g1
+ ldr $g1,[$nonce,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h2,$g2
+ ldr $g2,[$nonce,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne $h3,$g3
+ ldr $g3,[$nonce,#12]
+
+ adds $h0,$h0,$g0
+ adcs $h1,$h1,$g1
+ adcs $h2,$h2,$g2
+ adc $h3,$h3,$g3
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev $h0,$h0
+ rev $h1,$h1
+ rev $h2,$h2
+ rev $h3,$h3
+# endif
+ str $h0,[$mac,#0]
+ str $h1,[$mac,#4]
+ str $h2,[$mac,#8]
+ str $h3,[$mac,#12]
+#else
+ strb $h0,[$mac,#0]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#4]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#8]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#12]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#1]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#5]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#9]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#13]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#2]
+ mov $h0,$h0,lsr#8
+ strb $h1,[$mac,#6]
+ mov $h1,$h1,lsr#8
+ strb $h2,[$mac,#10]
+ mov $h2,$h2,lsr#8
+ strb $h3,[$mac,#14]
+ mov $h3,$h3,lsr#8
+
+ strb $h0,[$mac,#3]
+ strb $h1,[$mac,#7]
+ strb $h2,[$mac,#11]
+ strb $h3,[$mac,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ ret @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+___
+{
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("d$_",(0..9));
+my ($D0,$D1,$D2,$D3,$D4, $H0,$H1,$H2,$H3,$H4) = map("q$_",(5..14));
+my ($T0,$T1,$MASK) = map("q$_",(15,4,0));
+
+my ($in2,$zeros,$tbl0,$tbl1) = map("r$_",(4..7));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[$ctx,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[$ctx,#20] @ load key base 2^32
+ ldr r5,[$ctx,#24]
+ ldr r6,[$ctx,#28]
+ ldr r7,[$ctx,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 $R0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 $R1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 $S1,r2
+ vdup.32 $R2,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 $S2,r3
+ vdup.32 $R3,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 $S3,r4
+ vdup.32 $R4,r6
+ vdup.32 $S4,r5
+
+ mov $zeros,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 $D0,$R0,${R0}[1]
+ vmull.u32 $D1,$R1,${R0}[1]
+ vmull.u32 $D2,$R2,${R0}[1]
+ vmull.u32 $D3,$R3,${R0}[1]
+ vmull.u32 $D4,$R4,${R0}[1]
+
+ vmlal.u32 $D0,$R4,${S1}[1]
+ vmlal.u32 $D1,$R0,${R1}[1]
+ vmlal.u32 $D2,$R1,${R1}[1]
+ vmlal.u32 $D3,$R2,${R1}[1]
+ vmlal.u32 $D4,$R3,${R1}[1]
+
+ vmlal.u32 $D0,$R3,${S2}[1]
+ vmlal.u32 $D1,$R4,${S2}[1]
+ vmlal.u32 $D3,$R1,${R2}[1]
+ vmlal.u32 $D2,$R0,${R2}[1]
+ vmlal.u32 $D4,$R2,${R2}[1]
+
+ vmlal.u32 $D0,$R2,${S3}[1]
+ vmlal.u32 $D3,$R0,${R3}[1]
+ vmlal.u32 $D1,$R3,${S3}[1]
+ vmlal.u32 $D2,$R4,${S3}[1]
+ vmlal.u32 $D4,$R1,${R3}[1]
+
+ vmlal.u32 $D3,$R4,${S4}[1]
+ vmlal.u32 $D0,$R1,${S4}[1]
+ vmlal.u32 $D1,$R2,${S4}[1]
+ vmlal.u32 $D2,$R3,${S4}[1]
+ vmlal.u32 $D4,$R0,${R4}[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vbic.i32 $D4#lo,#0xfc000000
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo @ h4 -> h0
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vbic.i32 $D2#lo,#0xfc000000
+
+ vshr.u32 $T0#lo,$D0#lo,#26
+ vbic.i32 $D0#lo,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+
+ subs $zeros,$zeros,#1
+ beq .Lsquare_break_neon
+
+ add $tbl0,$ctx,#(48+0*9*4)
+ add $tbl1,$ctx,#(48+1*9*4)
+
+ vtrn.32 $R0,$D0#lo @ r^2:r^1
+ vtrn.32 $R2,$D2#lo
+ vtrn.32 $R3,$D3#lo
+ vtrn.32 $R1,$D1#lo
+ vtrn.32 $R4,$D4#lo
+
+ vshl.u32 $S2,$R2,#2 @ *5
+ vshl.u32 $S3,$R3,#2
+ vshl.u32 $S1,$R1,#2
+ vshl.u32 $S4,$R4,#2
+ vadd.i32 $S2,$S2,$R2
+ vadd.i32 $S1,$S1,$R1
+ vadd.i32 $S3,$S3,$R3
+ vadd.i32 $S4,$S4,$R4
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0,:32]
+ vst1.32 {${S4}[1]},[$tbl1,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add $tbl0,$ctx,#(48+2*4*9)
+ add $tbl1,$ctx,#(48+3*4*9)
+
+ vmov $R0,$D0#lo @ r^4:r^3
+ vshl.u32 $S1,$D1#lo,#2 @ *5
+ vmov $R1,$D1#lo
+ vshl.u32 $S2,$D2#lo,#2
+ vmov $R2,$D2#lo
+ vshl.u32 $S3,$D3#lo,#2
+ vmov $R3,$D3#lo
+ vshl.u32 $S4,$D4#lo,#2
+ vmov $R4,$D4#lo
+ vadd.i32 $S1,$S1,$D1#lo
+ vadd.i32 $S2,$S2,$D2#lo
+ vadd.i32 $S3,$S3,$D3#lo
+ vadd.i32 $S4,$S4,$D4#lo
+
+ vst4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]!
+ vst4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]!
+ vst4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vst4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vst1.32 {${S4}[0]},[$tbl0]
+ vst1.32 {${S4}[1]},[$tbl1]
+
+.Lno_init_neon:
+ ret @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[$ctx,#36] @ is_base2_26
+
+ cmp $len,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[$ctx,#0] @ load hash value base 2^32
+ ldr r5,[$ctx,#4]
+ ldr r6,[$ctx,#8]
+ ldr r7,[$ctx,#12]
+ ldr ip,[$ctx,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor $D0#lo,$D0#lo,$D0#lo
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor $D1#lo,$D1#lo,$D1#lo
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor $D2#lo,$D2#lo,$D2#lo
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor $D3#lo,$D3#lo,$D3#lo
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor $D4#lo,$D4#lo,$D4#lo
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[$ctx,#36] @ set is_base2_26
+
+ vmov.32 $D0#lo[0],r2
+ vmov.32 $D1#lo[0],r3
+ vmov.32 $D2#lo[0],r4
+ vmov.32 $D3#lo[0],r5
+ vmov.32 $D4#lo[0],r6
+ adr $zeros,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor $D0#lo,$D0#lo,$D0#lo
+ veor $D1#lo,$D1#lo,$D1#lo
+ veor $D2#lo,$D2#lo,$D2#lo
+ veor $D3#lo,$D3#lo,$D3#lo
+ veor $D4#lo,$D4#lo,$D4#lo
+ vld4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ adr $zeros,.Lzeros
+ vld1.32 {$D4#lo[0]},[$ctx]
+ sub $ctx,$ctx,#16 @ rewind
+
+.Lhash_loaded:
+ add $in2,$inp,#32
+ mov $padbit,$padbit,lsl#24
+ tst $len,#31
+ beq .Leven
+
+ vld4.32 {$H0#lo[0],$H1#lo[0],$H2#lo[0],$H3#lo[0]},[$inp]!
+ vmov.32 $H4#lo[0],$padbit
+ sub $len,$len,#16
+ add $in2,$inp,#32
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4#lo,$H3#lo,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3#lo,$H3#lo,#18
+
+ vsri.u32 $H3#lo,$H2#lo,#14
+ vshl.u32 $H2#lo,$H2#lo,#12
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo @ add hash value and move to #hi
+
+ vbic.i32 $H3#lo,#0xfc000000
+ vsri.u32 $H2#lo,$H1#lo,#20
+ vshl.u32 $H1#lo,$H1#lo,#6
+
+ vbic.i32 $H2#lo,#0xfc000000
+ vsri.u32 $H1#lo,$H0#lo,#26
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+
+ vbic.i32 $H0#lo,#0xfc000000
+ vbic.i32 $H1#lo,#0xfc000000
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo
+
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+
+ mov $tbl1,$zeros
+ add $tbl0,$ctx,#48
+
+ cmp $len,$len
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs $len,$len,#64
+ it lo
+ movlo $in2,$zeros
+
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+ itt hi
+ addhi $tbl1,$ctx,#(48+1*9*4)
+ addhi $tbl0,$ctx,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H3,$H3
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+# endif
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vshl.u32 $H3,$H3,#18
+
+ vsri.u32 $H3,$H2,#14
+ vshl.u32 $H2,$H2,#12
+
+ vbic.i32 $H3,#0xfc000000
+ vsri.u32 $H2,$H1,#20
+ vshl.u32 $H1,$H1,#6
+
+ vbic.i32 $H2,#0xfc000000
+ vsri.u32 $H1,$H0,#26
+
+ vbic.i32 $H0,#0xfc000000
+ vbic.i32 $H1,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^2
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ \___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ \___________________/ \____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ accumulate inp[0:1]
+ vmull.u32 $D2,$H2#hi,${R0}[1]
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,${R0}[1]
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,${R0}[1]
+ vmlal.u32 $D2,$H1#hi,${R1}[1]
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,${R0}[1]
+
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,${R0}[1]
+ subs $len,$len,#64
+ vmlal.u32 $D0,$H4#hi,${S1}[1]
+ it lo
+ movlo $in2,$zeros
+ vmlal.u32 $D3,$H2#hi,${R1}[1]
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D1,$H0#hi,${R1}[1]
+ vmlal.u32 $D4,$H3#hi,${R1}[1]
+
+ vmlal.u32 $D0,$H3#hi,${S2}[1]
+ vmlal.u32 $D3,$H1#hi,${R2}[1]
+ vmlal.u32 $D4,$H2#hi,${R2}[1]
+ vmlal.u32 $D1,$H4#hi,${S2}[1]
+ vmlal.u32 $D2,$H0#hi,${R2}[1]
+
+ vmlal.u32 $D3,$H0#hi,${R3}[1]
+ vmlal.u32 $D0,$H2#hi,${S3}[1]
+ vmlal.u32 $D4,$H1#hi,${R3}[1]
+ vmlal.u32 $D1,$H3#hi,${S3}[1]
+ vmlal.u32 $D2,$H4#hi,${S3}[1]
+
+ vmlal.u32 $D3,$H4#hi,${S4}[1]
+ vmlal.u32 $D0,$H1#hi,${S4}[1]
+ vmlal.u32 $D4,$H0#hi,${R4}[1]
+ vmlal.u32 $D1,$H2#hi,${S4}[1]
+ vmlal.u32 $D2,$H3#hi,${S4}[1]
+
+ vld4.32 {$H0#hi,$H1#hi,$H2#hi,$H3#hi},[$in2] @ inp[2:3] (or 0)
+ add $in2,$in2,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 $D3,$H3#lo,${R0}[0]
+ vmlal.u32 $D0,$H0#lo,${R0}[0]
+ vmlal.u32 $D4,$H4#lo,${R0}[0]
+ vmlal.u32 $D1,$H1#lo,${R0}[0]
+ vmlal.u32 $D2,$H2#lo,${R0}[0]
+ vld1.32 ${S4}[0],[$tbl0,:32]
+
+ vmlal.u32 $D3,$H2#lo,${R1}[0]
+ vmlal.u32 $D0,$H4#lo,${S1}[0]
+ vmlal.u32 $D4,$H3#lo,${R1}[0]
+ vmlal.u32 $D1,$H0#lo,${R1}[0]
+ vmlal.u32 $D2,$H1#lo,${R1}[0]
+
+ vmlal.u32 $D3,$H1#lo,${R2}[0]
+ vmlal.u32 $D0,$H3#lo,${S2}[0]
+ vmlal.u32 $D4,$H2#lo,${R2}[0]
+ vmlal.u32 $D1,$H4#lo,${S2}[0]
+ vmlal.u32 $D2,$H0#lo,${R2}[0]
+
+ vmlal.u32 $D3,$H0#lo,${R3}[0]
+ vmlal.u32 $D0,$H2#lo,${S3}[0]
+ vmlal.u32 $D4,$H1#lo,${R3}[0]
+ vmlal.u32 $D1,$H3#lo,${S3}[0]
+ vmlal.u32 $D3,$H4#lo,${S4}[0]
+
+ vmlal.u32 $D2,$H4#lo,${S3}[0]
+ vmlal.u32 $D0,$H1#lo,${S4}[0]
+ vmlal.u32 $D4,$H0#lo,${R4}[0]
+ vmov.i32 $H4,#1<<24 @ padbit, yes, always
+ vmlal.u32 $D1,$H2#lo,${S4}[0]
+ vmlal.u32 $D2,$H3#lo,${S4}[0]
+
+ vld4.32 {$H0#lo,$H1#lo,$H2#lo,$H3#lo},[$inp] @ inp[0:1]
+ add $inp,$inp,#64
+# ifdef __ARMEB__
+ vrev32.8 $H0,$H0
+ vrev32.8 $H1,$H1
+ vrev32.8 $H2,$H2
+ vrev32.8 $H3,$H3
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to $H0-$H3 and smashed to $H0-$H4.
+
+ vshr.u64 $T0,$D3,#26
+ vmovn.i64 $D3#lo,$D3
+ vshr.u64 $T1,$D0,#26
+ vmovn.i64 $D0#lo,$D0
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vbic.i32 $D3#lo,#0xfc000000
+ vsri.u32 $H4,$H3,#8 @ base 2^32 -> base 2^26
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+ vshl.u32 $H3,$H3,#18
+ vbic.i32 $D0#lo,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D4,#26
+ vmovn.i64 $D4#lo,$D4
+ vshr.u64 $T1,$D1,#26
+ vmovn.i64 $D1#lo,$D1
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+ vsri.u32 $H3,$H2,#14
+ vbic.i32 $D4#lo,#0xfc000000
+ vshl.u32 $H2,$H2,#12
+ vbic.i32 $D1#lo,#0xfc000000
+
+ vadd.i32 $D0#lo,$D0#lo,$T0#lo
+ vshl.u32 $T0#lo,$T0#lo,#2
+ vbic.i32 $H3,#0xfc000000
+ vshrn.u64 $T1#lo,$D2,#26
+ vmovn.i64 $D2#lo,$D2
+ vaddl.u32 $D0,$D0#lo,$T0#lo @ h4 -> h0 [widen for a sec]
+ vsri.u32 $H2,$H1,#20
+ vadd.i32 $D3#lo,$D3#lo,$T1#lo @ h2 -> h3
+ vshl.u32 $H1,$H1,#6
+ vbic.i32 $D2#lo,#0xfc000000
+ vbic.i32 $H2,#0xfc000000
+
+ vshrn.u64 $T0#lo,$D0,#26 @ re-narrow
+ vmovn.i64 $D0#lo,$D0
+ vsri.u32 $H1,$H0,#26
+ vbic.i32 $H0,#0xfc000000
+ vshr.u32 $T1#lo,$D3#lo,#26
+ vbic.i32 $D3#lo,#0xfc000000
+ vbic.i32 $D0#lo,#0xfc000000
+ vadd.i32 $D1#lo,$D1#lo,$T0#lo @ h0 -> h1
+ vadd.i32 $D4#lo,$D4#lo,$T1#lo @ h3 -> h4
+ vbic.i32 $H1,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add $tbl1,$ctx,#(48+0*9*4)
+ add $tbl0,$ctx,#(48+1*9*4)
+ adds $len,$len,#32
+ it ne
+ movne $len,#0
+ bne .Long_tail
+
+ vadd.i32 $H2#hi,$H2#lo,$D2#lo @ add hash value and move to #hi
+ vadd.i32 $H0#hi,$H0#lo,$D0#lo
+ vadd.i32 $H3#hi,$H3#lo,$D3#lo
+ vadd.i32 $H1#hi,$H1#lo,$D1#lo
+ vadd.i32 $H4#hi,$H4#lo,$D4#lo
+
+.Long_tail:
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^1
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^2
+
+ vadd.i32 $H2#lo,$H2#lo,$D2#lo @ can be redundant
+ vmull.u32 $D2,$H2#hi,$R0
+ vadd.i32 $H0#lo,$H0#lo,$D0#lo
+ vmull.u32 $D0,$H0#hi,$R0
+ vadd.i32 $H3#lo,$H3#lo,$D3#lo
+ vmull.u32 $D3,$H3#hi,$R0
+ vadd.i32 $H1#lo,$H1#lo,$D1#lo
+ vmull.u32 $D1,$H1#hi,$R0
+ vadd.i32 $H4#lo,$H4#lo,$D4#lo
+ vmull.u32 $D4,$H4#hi,$R0
+
+ vmlal.u32 $D0,$H4#hi,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#hi,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#hi,$R1
+ vmlal.u32 $D4,$H3#hi,$R1
+ vmlal.u32 $D2,$H1#hi,$R1
+
+ vmlal.u32 $D3,$H1#hi,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#hi,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#hi,$R2
+ vmlal.u32 $D1,$H4#hi,$S2
+ vmlal.u32 $D2,$H0#hi,$R2
+
+ vmlal.u32 $D3,$H0#hi,$R3
+ it ne
+ addne $tbl1,$ctx,#(48+2*9*4)
+ vmlal.u32 $D0,$H2#hi,$S3
+ it ne
+ addne $tbl0,$ctx,#(48+3*9*4)
+ vmlal.u32 $D4,$H1#hi,$R3
+ vmlal.u32 $D1,$H3#hi,$S3
+ vmlal.u32 $D2,$H4#hi,$S3
+
+ vmlal.u32 $D3,$H4#hi,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones, can be redundant
+ vmlal.u32 $D0,$H1#hi,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#hi,$R4
+ vmlal.u32 $D1,$H2#hi,$S4
+ vmlal.u32 $D2,$H3#hi,$S4
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {${R0}[1],${R1}[1],${S1}[1],${R2}[1]},[$tbl1]! @ load r^3
+ vld4.32 {${R0}[0],${R1}[0],${S1}[0],${R2}[0]},[$tbl0]! @ load r^4
+
+ vmlal.u32 $D2,$H2#lo,$R0
+ vmlal.u32 $D0,$H0#lo,$R0
+ vmlal.u32 $D3,$H3#lo,$R0
+ vmlal.u32 $D1,$H1#lo,$R0
+ vmlal.u32 $D4,$H4#lo,$R0
+
+ vmlal.u32 $D0,$H4#lo,$S1
+ vld4.32 {${S2}[1],${R3}[1],${S3}[1],${R4}[1]},[$tbl1]!
+ vmlal.u32 $D3,$H2#lo,$R1
+ vld4.32 {${S2}[0],${R3}[0],${S3}[0],${R4}[0]},[$tbl0]!
+ vmlal.u32 $D1,$H0#lo,$R1
+ vmlal.u32 $D4,$H3#lo,$R1
+ vmlal.u32 $D2,$H1#lo,$R1
+
+ vmlal.u32 $D3,$H1#lo,$R2
+ vld1.32 ${S4}[1],[$tbl1,:32]
+ vmlal.u32 $D0,$H3#lo,$S2
+ vld1.32 ${S4}[0],[$tbl0,:32]
+ vmlal.u32 $D4,$H2#lo,$R2
+ vmlal.u32 $D1,$H4#lo,$S2
+ vmlal.u32 $D2,$H0#lo,$R2
+
+ vmlal.u32 $D3,$H0#lo,$R3
+ vmlal.u32 $D0,$H2#lo,$S3
+ vmlal.u32 $D4,$H1#lo,$R3
+ vmlal.u32 $D1,$H3#lo,$S3
+ vmlal.u32 $D2,$H4#lo,$S3
+
+ vmlal.u32 $D3,$H4#lo,$S4
+ vorn $MASK,$MASK,$MASK @ all-ones
+ vmlal.u32 $D0,$H1#lo,$S4
+ vshr.u64 $MASK,$MASK,#38
+ vmlal.u32 $D4,$H0#lo,$R4
+ vmlal.u32 $D1,$H2#lo,$S4
+ vmlal.u32 $D2,$H3#lo,$S4
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 $D3#lo,$D3#lo,$D3#hi
+ vadd.i64 $D0#lo,$D0#lo,$D0#hi
+ vadd.i64 $D4#lo,$D4#lo,$D4#hi
+ vadd.i64 $D1#lo,$D1#lo,$D1#hi
+ vadd.i64 $D2#lo,$D2#lo,$D2#hi
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 $T0,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vshr.u64 $T1,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vadd.i64 $D4,$D4,$T0 @ h3 -> h4
+ vadd.i64 $D1,$D1,$T1 @ h0 -> h1
+
+ vshr.u64 $T0,$D4,#26
+ vand.i64 $D4,$D4,$MASK
+ vshr.u64 $T1,$D1,#26
+ vand.i64 $D1,$D1,$MASK
+ vadd.i64 $D2,$D2,$T1 @ h1 -> h2
+
+ vadd.i64 $D0,$D0,$T0
+ vshl.u64 $T0,$T0,#2
+ vshr.u64 $T1,$D2,#26
+ vand.i64 $D2,$D2,$MASK
+ vadd.i64 $D0,$D0,$T0 @ h4 -> h0
+ vadd.i64 $D3,$D3,$T1 @ h2 -> h3
+
+ vshr.u64 $T0,$D0,#26
+ vand.i64 $D0,$D0,$MASK
+ vshr.u64 $T1,$D3,#26
+ vand.i64 $D3,$D3,$MASK
+ vadd.i64 $D1,$D1,$T0 @ h0 -> h1
+ vadd.i64 $D4,$D4,$T1 @ h3 -> h4
+
+ cmp $len,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {$D0#lo[0],$D1#lo[0],$D2#lo[0],$D3#lo[0]},[$ctx]!
+ vst1.32 {$D4#lo[0]},[$ctx]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ ret @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+___
+} }
+$code.=<<___;
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+
+foreach (split("\n",$code)) {
+ s/\`([^\`]*)\`/eval $1/geo;
+
+ s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo or
+ s/\bret\b/bx lr/go or
+ s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
+
+ print $_,"\n";
+}
+close STDOUT; # enforce flush
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..37b71d990293
--- /dev/null
+++ b/arch/arm/crypto/poly1305-core.S_shipped
@@ -0,0 +1,1158 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+#else
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
+# define poly1305_init poly1305_init_arm
+# define poly1305_blocks poly1305_blocks_arm
+# define poly1305_emit poly1305_emit_arm
+.globl poly1305_blocks_neon
+#endif
+
+#if defined(__thumb2__)
+.syntax unified
+.thumb
+#else
+.code 32
+#endif
+
+.text
+
+.globl poly1305_emit
+.globl poly1305_blocks
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+.Lpoly1305_init:
+ stmdb sp!,{r4-r11}
+
+ eor r3,r3,r3
+ cmp r1,#0
+ str r3,[r0,#0] @ zero hash value
+ str r3,[r0,#4]
+ str r3,[r0,#8]
+ str r3,[r0,#12]
+ str r3,[r0,#16]
+ str r3,[r0,#36] @ clear is_base2_26
+ add r0,r0,#20
+
+#ifdef __thumb2__
+ it eq
+#endif
+ moveq r0,#0
+ beq .Lno_key
+
+#if __ARM_MAX_ARCH__>=7
+ mov r3,#-1
+ str r3,[r0,#28] @ impossible key power value
+# ifndef __KERNEL__
+ adr r11,.Lpoly1305_init
+ ldr r12,.LOPENSSL_armcap
+# endif
+#endif
+ ldrb r4,[r1,#0]
+ mov r10,#0x0fffffff
+ ldrb r5,[r1,#1]
+ and r3,r10,#-4 @ 0x0ffffffc
+ ldrb r6,[r1,#2]
+ ldrb r7,[r1,#3]
+ orr r4,r4,r5,lsl#8
+ ldrb r5,[r1,#4]
+ orr r4,r4,r6,lsl#16
+ ldrb r6,[r1,#5]
+ orr r4,r4,r7,lsl#24
+ ldrb r7,[r1,#6]
+ and r4,r4,r10
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+# if !defined(_WIN32)
+ ldr r12,[r11,r12] @ OPENSSL_armcap_P
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
+ ldr r12,[r12]
+# endif
+#endif
+ ldrb r8,[r1,#7]
+ orr r5,r5,r6,lsl#8
+ ldrb r6,[r1,#8]
+ orr r5,r5,r7,lsl#16
+ ldrb r7,[r1,#9]
+ orr r5,r5,r8,lsl#24
+ ldrb r8,[r1,#10]
+ and r5,r5,r3
+
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ tst r12,#ARMV7_NEON @ check for NEON
+# ifdef __thumb2__
+ adr r9,.Lpoly1305_blocks_neon
+ adr r11,.Lpoly1305_blocks
+ it ne
+ movne r11,r9
+ adr r12,.Lpoly1305_emit
+ orr r11,r11,#1 @ thumb-ify addresses
+ orr r12,r12,#1
+# else
+ add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+ ite eq
+ addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+ addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
+# endif
+#endif
+ ldrb r9,[r1,#11]
+ orr r6,r6,r7,lsl#8
+ ldrb r7,[r1,#12]
+ orr r6,r6,r8,lsl#16
+ ldrb r8,[r1,#13]
+ orr r6,r6,r9,lsl#24
+ ldrb r9,[r1,#14]
+ and r6,r6,r3
+
+ ldrb r10,[r1,#15]
+ orr r7,r7,r8,lsl#8
+ str r4,[r0,#0]
+ orr r7,r7,r9,lsl#16
+ str r5,[r0,#4]
+ orr r7,r7,r10,lsl#24
+ str r6,[r0,#8]
+ and r7,r7,r3
+ str r7,[r0,#12]
+#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
+ stmia r2,{r11,r12} @ fill functions table
+ mov r0,#1
+#else
+ mov r0,#0
+#endif
+.Lno_key:
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_init,.-poly1305_init
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ stmdb sp!,{r3-r11,lr}
+
+ ands r2,r2,#-16
+ beq .Lno_data
+
+ add r2,r2,r1 @ end pointer
+ sub sp,sp,#32
+
+#if __ARM_ARCH__<7
+ ldmia r0,{r4-r12} @ load context
+ add r0,r0,#20
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+#else
+ ldr lr,[r0,#36] @ is_base2_26
+ ldmia r0!,{r4-r8} @ load hash value
+ str r2,[sp,#16] @ offload stuff
+ str r0,[sp,#12]
+
+ adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32
+ mov r10,r5,lsr#6
+ adcs r10,r10,r6,lsl#20
+ mov r11,r6,lsr#12
+ adcs r11,r11,r7,lsl#14
+ mov r12,r7,lsr#18
+ adcs r12,r12,r8,lsl#8
+ mov r2,#0
+ teq lr,#0
+ str r2,[r0,#16] @ clear is_base2_26
+ adc r2,r2,r8,lsr#24
+
+ itttt ne
+ movne r4,r9 @ choose between radixes
+ movne r5,r10
+ movne r6,r11
+ movne r7,r12
+ ldmia r0,{r9-r12} @ load key
+ it ne
+ movne r8,r2
+#endif
+
+ mov lr,r1
+ cmp r3,#0
+ str r10,[sp,#20]
+ str r11,[sp,#24]
+ str r12,[sp,#28]
+ b .Loop
+
+.align 4
+.Loop:
+#if __ARM_ARCH__<7
+ ldrb r0,[lr],#16 @ load input
+# ifdef __thumb2__
+ it hi
+# endif
+ addhi r8,r8,#1 @ 1<<128
+ ldrb r1,[lr,#-15]
+ ldrb r2,[lr,#-14]
+ ldrb r3,[lr,#-13]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-12]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-11]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-10]
+ adds r4,r4,r3 @ accumulate input
+
+ ldrb r3,[lr,#-9]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-8]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-7]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-6]
+ adcs r5,r5,r3
+
+ ldrb r3,[lr,#-5]
+ orr r1,r0,r1,lsl#8
+ ldrb r0,[lr,#-4]
+ orr r2,r1,r2,lsl#16
+ ldrb r1,[lr,#-3]
+ orr r3,r2,r3,lsl#24
+ ldrb r2,[lr,#-2]
+ adcs r6,r6,r3
+
+ ldrb r3,[lr,#-1]
+ orr r1,r0,r1,lsl#8
+ str lr,[sp,#8] @ offload input pointer
+ orr r2,r1,r2,lsl#16
+ add r10,r10,r10,lsr#2
+ orr r3,r2,r3,lsl#24
+#else
+ ldr r0,[lr],#16 @ load input
+ it hi
+ addhi r8,r8,#1 @ padbit
+ ldr r1,[lr,#-12]
+ ldr r2,[lr,#-8]
+ ldr r3,[lr,#-4]
+# ifdef __ARMEB__
+ rev r0,r0
+ rev r1,r1
+ rev r2,r2
+ rev r3,r3
+# endif
+ adds r4,r4,r0 @ accumulate input
+ str lr,[sp,#8] @ offload input pointer
+ adcs r5,r5,r1
+ add r10,r10,r10,lsr#2
+ adcs r6,r6,r2
+#endif
+ add r11,r11,r11,lsr#2
+ adcs r7,r7,r3
+ add r12,r12,r12,lsr#2
+
+ umull r2,r3,r5,r9
+ adc r8,r8,#0
+ umull r0,r1,r4,r9
+ umlal r2,r3,r8,r10
+ umlal r0,r1,r7,r10
+ ldr r10,[sp,#20] @ reload r10
+ umlal r2,r3,r6,r12
+ umlal r0,r1,r5,r12
+ umlal r2,r3,r7,r11
+ umlal r0,r1,r6,r11
+ umlal r2,r3,r4,r10
+ str r0,[sp,#0] @ future r4
+ mul r0,r11,r8
+ ldr r11,[sp,#24] @ reload r11
+ adds r2,r2,r1 @ d1+=d0>>32
+ eor r1,r1,r1
+ adc lr,r3,#0 @ future r6
+ str r2,[sp,#4] @ future r5
+
+ mul r2,r12,r8
+ eor r3,r3,r3
+ umlal r0,r1,r7,r12
+ ldr r12,[sp,#28] @ reload r12
+ umlal r2,r3,r7,r9
+ umlal r0,r1,r6,r9
+ umlal r2,r3,r6,r10
+ umlal r0,r1,r5,r10
+ umlal r2,r3,r5,r11
+ umlal r0,r1,r4,r11
+ umlal r2,r3,r4,r12
+ ldr r4,[sp,#0]
+ mul r8,r9,r8
+ ldr r5,[sp,#4]
+
+ adds r6,lr,r0 @ d2+=d1>>32
+ ldr lr,[sp,#8] @ reload input pointer
+ adc r1,r1,#0
+ adds r7,r2,r1 @ d3+=d2>>32
+ ldr r0,[sp,#16] @ reload end pointer
+ adc r3,r3,#0
+ add r8,r8,r3 @ h4+=d3>>32
+
+ and r1,r8,#-4
+ and r8,r8,#3
+ add r1,r1,r1,lsr#2 @ *=5
+ adds r4,r4,r1
+ adcs r5,r5,#0
+ adcs r6,r6,#0
+ adcs r7,r7,#0
+ adc r8,r8,#0
+
+ cmp r0,lr @ done yet?
+ bhi .Loop
+
+ ldr r0,[sp,#12]
+ add sp,sp,#32
+ stmdb r0,{r4-r8} @ store the result
+
+.Lno_data:
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r3-r11,pc}
+#else
+ ldmia sp!,{r3-r11,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_blocks,.-poly1305_blocks
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ stmdb sp!,{r4-r11}
+
+ ldmia r0,{r3-r7}
+
+#if __ARM_ARCH__>=7
+ ldr ip,[r0,#36] @ is_base2_26
+
+ adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32
+ mov r9,r4,lsr#6
+ adcs r9,r9,r5,lsl#20
+ mov r10,r5,lsr#12
+ adcs r10,r10,r6,lsl#14
+ mov r11,r6,lsr#18
+ adcs r11,r11,r7,lsl#8
+ mov r0,#0
+ adc r0,r0,r7,lsr#24
+
+ tst ip,ip
+ itttt ne
+ movne r3,r8
+ movne r4,r9
+ movne r5,r10
+ movne r6,r11
+ it ne
+ movne r7,r0
+#endif
+
+ adds r8,r3,#5 @ compare to modulus
+ adcs r9,r4,#0
+ adcs r10,r5,#0
+ adcs r11,r6,#0
+ adc r0,r7,#0
+ tst r0,#4 @ did it carry/borrow?
+
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r3,r8
+ ldr r8,[r2,#0]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r4,r9
+ ldr r9,[r2,#4]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r5,r10
+ ldr r10,[r2,#8]
+#ifdef __thumb2__
+ it ne
+#endif
+ movne r6,r11
+ ldr r11,[r2,#12]
+
+ adds r3,r3,r8
+ adcs r4,r4,r9
+ adcs r5,r5,r10
+ adc r6,r6,r11
+
+#if __ARM_ARCH__>=7
+# ifdef __ARMEB__
+ rev r3,r3
+ rev r4,r4
+ rev r5,r5
+ rev r6,r6
+# endif
+ str r3,[r1,#0]
+ str r4,[r1,#4]
+ str r5,[r1,#8]
+ str r6,[r1,#12]
+#else
+ strb r3,[r1,#0]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#4]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#8]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#12]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#1]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#5]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#9]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#13]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#2]
+ mov r3,r3,lsr#8
+ strb r4,[r1,#6]
+ mov r4,r4,lsr#8
+ strb r5,[r1,#10]
+ mov r5,r5,lsr#8
+ strb r6,[r1,#14]
+ mov r6,r6,lsr#8
+
+ strb r3,[r1,#3]
+ strb r4,[r1,#7]
+ strb r5,[r1,#11]
+ strb r6,[r1,#15]
+#endif
+ ldmia sp!,{r4-r11}
+#if __ARM_ARCH__>=5
+ bx lr @ bx lr
+#else
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ .word 0xe12fff1e @ interoperable with Thumb ISA:-)
+#endif
+.size poly1305_emit,.-poly1305_emit
+#if __ARM_MAX_ARCH__>=7
+.fpu neon
+
+.type poly1305_init_neon,%function
+.align 5
+poly1305_init_neon:
+.Lpoly1305_init_neon:
+ ldr r3,[r0,#48] @ first table element
+ cmp r3,#-1 @ is value impossible?
+ bne .Lno_init_neon
+
+ ldr r4,[r0,#20] @ load key base 2^32
+ ldr r5,[r0,#24]
+ ldr r6,[r0,#28]
+ ldr r7,[r0,#32]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ and r3,r3,#0x03ffffff
+ and r4,r4,#0x03ffffff
+ and r5,r5,#0x03ffffff
+
+ vdup.32 d0,r2 @ r^1 in both lanes
+ add r2,r3,r3,lsl#2 @ *5
+ vdup.32 d1,r3
+ add r3,r4,r4,lsl#2
+ vdup.32 d2,r2
+ vdup.32 d3,r4
+ add r4,r5,r5,lsl#2
+ vdup.32 d4,r3
+ vdup.32 d5,r5
+ add r5,r6,r6,lsl#2
+ vdup.32 d6,r4
+ vdup.32 d7,r6
+ vdup.32 d8,r5
+
+ mov r5,#2 @ counter
+
+.Lsquare_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+
+ vmull.u32 q5,d0,d0[1]
+ vmull.u32 q6,d1,d0[1]
+ vmull.u32 q7,d3,d0[1]
+ vmull.u32 q8,d5,d0[1]
+ vmull.u32 q9,d7,d0[1]
+
+ vmlal.u32 q5,d7,d2[1]
+ vmlal.u32 q6,d0,d1[1]
+ vmlal.u32 q7,d1,d1[1]
+ vmlal.u32 q8,d3,d1[1]
+ vmlal.u32 q9,d5,d1[1]
+
+ vmlal.u32 q5,d5,d4[1]
+ vmlal.u32 q6,d7,d4[1]
+ vmlal.u32 q8,d1,d3[1]
+ vmlal.u32 q7,d0,d3[1]
+ vmlal.u32 q9,d3,d3[1]
+
+ vmlal.u32 q5,d3,d6[1]
+ vmlal.u32 q8,d0,d5[1]
+ vmlal.u32 q6,d5,d6[1]
+ vmlal.u32 q7,d7,d6[1]
+ vmlal.u32 q9,d1,d5[1]
+
+ vmlal.u32 q8,d7,d8[1]
+ vmlal.u32 q5,d1,d8[1]
+ vmlal.u32 q6,d3,d8[1]
+ vmlal.u32 q7,d5,d8[1]
+ vmlal.u32 q9,d0,d7[1]
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ @ and P. Schwabe
+ @
+ @ H0>>+H1>>+H2>>+H3>>+H4
+ @ H3>>+H4>>*5+H0>>+H1
+ @
+ @ Trivia.
+ @
+ @ Result of multiplication of n-bit number by m-bit number is
+ @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
+ @ m-bit number multiplied by 2^n is still n+m bits wide.
+ @
+ @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
+ @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
+ @ one is n+1 bits wide.
+ @
+ @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
+ @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
+ @ can be 27. However! In cases when their width exceeds 26 bits
+ @ they are limited by 2^26+2^6. This in turn means that *sum*
+ @ of the products with these values can still be viewed as sum
+ @ of 52-bit numbers as long as the amount of addends is not a
+ @ power of 2. For example,
+ @
+ @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
+ @
+ @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
+ @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
+ @ 8 * (2^52) or 2^55. However, the value is then multiplied by
+ @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
+ @ which is less than 32 * (2^52) or 2^57. And when processing
+ @ data we are looking at triple as many addends...
+ @
+ @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
+ @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
+ @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
+ @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
+ @ instruction accepts 2x32-bit input and writes 2x64-bit result.
+ @ This means that result of reduction have to be compressed upon
+ @ loop wrap-around. This can be done in the process of reduction
+ @ to minimize amount of instructions [as well as amount of
+ @ 128-bit instructions, which benefits low-end processors], but
+ @ one has to watch for H2 (which is narrower than H0) and 5*H4
+ @ not being wider than 58 bits, so that result of right shift
+ @ by 26 bits fits in 32 bits. This is also useful on x86,
+ @ because it allows to use paddd in place for paddq, which
+ @ benefits Atom, where paddq is ridiculously slow.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000 @ &=0x03ffffff
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vbic.i32 d18,#0xfc000000
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vadd.i32 d10,d10,d30 @ h4 -> h0
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vbic.i32 d14,#0xfc000000
+
+ vshr.u32 d30,d10,#26
+ vbic.i32 d10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+
+ subs r5,r5,#1
+ beq .Lsquare_break_neon
+
+ add r6,r0,#(48+0*9*4)
+ add r7,r0,#(48+1*9*4)
+
+ vtrn.32 d0,d10 @ r^2:r^1
+ vtrn.32 d3,d14
+ vtrn.32 d5,d16
+ vtrn.32 d1,d12
+ vtrn.32 d7,d18
+
+ vshl.u32 d4,d3,#2 @ *5
+ vshl.u32 d6,d5,#2
+ vshl.u32 d2,d1,#2
+ vshl.u32 d8,d7,#2
+ vadd.i32 d4,d4,d3
+ vadd.i32 d2,d2,d1
+ vadd.i32 d6,d6,d5
+ vadd.i32 d8,d8,d7
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6,:32]
+ vst1.32 {d8[1]},[r7,:32]
+
+ b .Lsquare_neon
+
+.align 4
+.Lsquare_break_neon:
+ add r6,r0,#(48+2*4*9)
+ add r7,r0,#(48+3*4*9)
+
+ vmov d0,d10 @ r^4:r^3
+ vshl.u32 d2,d12,#2 @ *5
+ vmov d1,d12
+ vshl.u32 d4,d14,#2
+ vmov d3,d14
+ vshl.u32 d6,d16,#2
+ vmov d5,d16
+ vshl.u32 d8,d18,#2
+ vmov d7,d18
+ vadd.i32 d2,d2,d12
+ vadd.i32 d4,d4,d14
+ vadd.i32 d6,d6,d16
+ vadd.i32 d8,d8,d18
+
+ vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
+ vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
+ vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vst1.32 {d8[0]},[r6]
+ vst1.32 {d8[1]},[r7]
+
+.Lno_init_neon:
+ bx lr @ bx lr
+.size poly1305_init_neon,.-poly1305_init_neon
+
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr ip,[r0,#36] @ is_base2_26
+
+ cmp r2,#64
+ blo .Lpoly1305_blocks
+
+ stmdb sp!,{r4-r7}
+ vstmdb sp!,{d8-d15} @ ABI specification says so
+
+ tst ip,ip @ is_base2_26?
+ bne .Lbase2_26_neon
+
+ stmdb sp!,{r1-r3,lr}
+ bl .Lpoly1305_init_neon
+
+ ldr r4,[r0,#0] @ load hash value base 2^32
+ ldr r5,[r0,#4]
+ ldr r6,[r0,#8]
+ ldr r7,[r0,#12]
+ ldr ip,[r0,#16]
+
+ and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
+ mov r3,r4,lsr#26
+ veor d10,d10,d10
+ mov r4,r5,lsr#20
+ orr r3,r3,r5,lsl#6
+ veor d12,d12,d12
+ mov r5,r6,lsr#14
+ orr r4,r4,r6,lsl#12
+ veor d14,d14,d14
+ mov r6,r7,lsr#8
+ orr r5,r5,r7,lsl#18
+ veor d16,d16,d16
+ and r3,r3,#0x03ffffff
+ orr r6,r6,ip,lsl#24
+ veor d18,d18,d18
+ and r4,r4,#0x03ffffff
+ mov r1,#1
+ and r5,r5,#0x03ffffff
+ str r1,[r0,#36] @ set is_base2_26
+
+ vmov.32 d10[0],r2
+ vmov.32 d12[0],r3
+ vmov.32 d14[0],r4
+ vmov.32 d16[0],r5
+ vmov.32 d18[0],r6
+ adr r5,.Lzeros
+
+ ldmia sp!,{r1-r3,lr}
+ b .Lhash_loaded
+
+.align 4
+.Lbase2_26_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ load hash value
+
+ veor d10,d10,d10
+ veor d12,d12,d12
+ veor d14,d14,d14
+ veor d16,d16,d16
+ veor d18,d18,d18
+ vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ adr r5,.Lzeros
+ vld1.32 {d18[0]},[r0]
+ sub r0,r0,#16 @ rewind
+
+.Lhash_loaded:
+ add r4,r1,#32
+ mov r3,r3,lsl#24
+ tst r2,#31
+ beq .Leven
+
+ vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]!
+ vmov.32 d28[0],r3
+ sub r2,r2,#16
+ add r4,r1,#32
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26
+ vshl.u32 d26,d26,#18
+
+ vsri.u32 d26,d24,#14
+ vshl.u32 d24,d24,#12
+ vadd.i32 d29,d28,d18 @ add hash value and move to #hi
+
+ vbic.i32 d26,#0xfc000000
+ vsri.u32 d24,d22,#20
+ vshl.u32 d22,d22,#6
+
+ vbic.i32 d24,#0xfc000000
+ vsri.u32 d22,d20,#26
+ vadd.i32 d27,d26,d16
+
+ vbic.i32 d20,#0xfc000000
+ vbic.i32 d22,#0xfc000000
+ vadd.i32 d25,d24,d14
+
+ vadd.i32 d21,d20,d10
+ vadd.i32 d23,d22,d12
+
+ mov r7,r5
+ add r6,r0,#48
+
+ cmp r2,r2
+ b .Long_tail
+
+.align 4
+.Leven:
+ subs r2,r2,#64
+ it lo
+ movlo r4,r5
+
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+ itt hi
+ addhi r7,r0,#(48+1*9*4)
+ addhi r6,r0,#(48+3*9*4)
+
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q13,q13
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+# endif
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vshl.u32 q13,q13,#18
+
+ vsri.u32 q13,q12,#14
+ vshl.u32 q12,q12,#12
+
+ vbic.i32 q13,#0xfc000000
+ vsri.u32 q12,q11,#20
+ vshl.u32 q11,q11,#6
+
+ vbic.i32 q12,#0xfc000000
+ vsri.u32 q11,q10,#26
+
+ vbic.i32 q10,#0xfc000000
+ vbic.i32 q11,#0xfc000000
+
+ bls .Lskip_loop
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ b .Loop_neon
+
+.align 5
+.Loop_neon:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ @ ___________________/
+ @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ @ ___________________/ ____________________/
+ @
+ @ Note that we start with inp[2:3]*r^2. This is because it
+ @ doesn't depend on reduction in previous iteration.
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
+ @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
+ @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
+ @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
+ @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ inp[2:3]*r^2
+
+ vadd.i32 d24,d24,d14 @ accumulate inp[0:1]
+ vmull.u32 q7,d25,d0[1]
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0[1]
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0[1]
+ vmlal.u32 q7,d23,d1[1]
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0[1]
+
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0[1]
+ subs r2,r2,#64
+ vmlal.u32 q5,d29,d2[1]
+ it lo
+ movlo r4,r5
+ vmlal.u32 q8,d25,d1[1]
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q6,d21,d1[1]
+ vmlal.u32 q9,d27,d1[1]
+
+ vmlal.u32 q5,d27,d4[1]
+ vmlal.u32 q8,d23,d3[1]
+ vmlal.u32 q9,d25,d3[1]
+ vmlal.u32 q6,d29,d4[1]
+ vmlal.u32 q7,d21,d3[1]
+
+ vmlal.u32 q8,d21,d5[1]
+ vmlal.u32 q5,d25,d6[1]
+ vmlal.u32 q9,d23,d5[1]
+ vmlal.u32 q6,d27,d6[1]
+ vmlal.u32 q7,d29,d6[1]
+
+ vmlal.u32 q8,d29,d8[1]
+ vmlal.u32 q5,d23,d8[1]
+ vmlal.u32 q9,d21,d7[1]
+ vmlal.u32 q6,d25,d8[1]
+ vmlal.u32 q7,d27,d8[1]
+
+ vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
+ add r4,r4,#64
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4 and accumulate
+
+ vmlal.u32 q8,d26,d0[0]
+ vmlal.u32 q5,d20,d0[0]
+ vmlal.u32 q9,d28,d0[0]
+ vmlal.u32 q6,d22,d0[0]
+ vmlal.u32 q7,d24,d0[0]
+ vld1.32 d8[0],[r6,:32]
+
+ vmlal.u32 q8,d24,d1[0]
+ vmlal.u32 q5,d28,d2[0]
+ vmlal.u32 q9,d26,d1[0]
+ vmlal.u32 q6,d20,d1[0]
+ vmlal.u32 q7,d22,d1[0]
+
+ vmlal.u32 q8,d22,d3[0]
+ vmlal.u32 q5,d26,d4[0]
+ vmlal.u32 q9,d24,d3[0]
+ vmlal.u32 q6,d28,d4[0]
+ vmlal.u32 q7,d20,d3[0]
+
+ vmlal.u32 q8,d20,d5[0]
+ vmlal.u32 q5,d24,d6[0]
+ vmlal.u32 q9,d22,d5[0]
+ vmlal.u32 q6,d26,d6[0]
+ vmlal.u32 q8,d28,d8[0]
+
+ vmlal.u32 q7,d28,d6[0]
+ vmlal.u32 q5,d22,d8[0]
+ vmlal.u32 q9,d20,d7[0]
+ vmov.i32 q14,#1<<24 @ padbit, yes, always
+ vmlal.u32 q6,d24,d8[0]
+ vmlal.u32 q7,d26,d8[0]
+
+ vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
+ add r1,r1,#64
+# ifdef __ARMEB__
+ vrev32.8 q10,q10
+ vrev32.8 q11,q11
+ vrev32.8 q12,q12
+ vrev32.8 q13,q13
+# endif
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction interleaved with base 2^32 -> base 2^26 of
+ @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
+
+ vshr.u64 q15,q8,#26
+ vmovn.i64 d16,q8
+ vshr.u64 q4,q5,#26
+ vmovn.i64 d10,q5
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vbic.i32 d16,#0xfc000000
+ vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+ vshl.u32 q13,q13,#18
+ vbic.i32 d10,#0xfc000000
+
+ vshrn.u64 d30,q9,#26
+ vmovn.i64 d18,q9
+ vshr.u64 q4,q6,#26
+ vmovn.i64 d12,q6
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+ vsri.u32 q13,q12,#14
+ vbic.i32 d18,#0xfc000000
+ vshl.u32 q12,q12,#12
+ vbic.i32 d12,#0xfc000000
+
+ vadd.i32 d10,d10,d30
+ vshl.u32 d30,d30,#2
+ vbic.i32 q13,#0xfc000000
+ vshrn.u64 d8,q7,#26
+ vmovn.i64 d14,q7
+ vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec]
+ vsri.u32 q12,q11,#20
+ vadd.i32 d16,d16,d8 @ h2 -> h3
+ vshl.u32 q11,q11,#6
+ vbic.i32 d14,#0xfc000000
+ vbic.i32 q12,#0xfc000000
+
+ vshrn.u64 d30,q5,#26 @ re-narrow
+ vmovn.i64 d10,q5
+ vsri.u32 q11,q10,#26
+ vbic.i32 q10,#0xfc000000
+ vshr.u32 d8,d16,#26
+ vbic.i32 d16,#0xfc000000
+ vbic.i32 d10,#0xfc000000
+ vadd.i32 d12,d12,d30 @ h0 -> h1
+ vadd.i32 d18,d18,d8 @ h3 -> h4
+ vbic.i32 q11,#0xfc000000
+
+ bhi .Loop_neon
+
+.Lskip_loop:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ add r7,r0,#(48+0*9*4)
+ add r6,r0,#(48+1*9*4)
+ adds r2,r2,#32
+ it ne
+ movne r2,#0
+ bne .Long_tail
+
+ vadd.i32 d25,d24,d14 @ add hash value and move to #hi
+ vadd.i32 d21,d20,d10
+ vadd.i32 d27,d26,d16
+ vadd.i32 d23,d22,d12
+ vadd.i32 d29,d28,d18
+
+.Long_tail:
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2
+
+ vadd.i32 d24,d24,d14 @ can be redundant
+ vmull.u32 q7,d25,d0
+ vadd.i32 d20,d20,d10
+ vmull.u32 q5,d21,d0
+ vadd.i32 d26,d26,d16
+ vmull.u32 q8,d27,d0
+ vadd.i32 d22,d22,d12
+ vmull.u32 q6,d23,d0
+ vadd.i32 d28,d28,d18
+ vmull.u32 q9,d29,d0
+
+ vmlal.u32 q5,d29,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d25,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d21,d1
+ vmlal.u32 q9,d27,d1
+ vmlal.u32 q7,d23,d1
+
+ vmlal.u32 q8,d23,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d27,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d25,d3
+ vmlal.u32 q6,d29,d4
+ vmlal.u32 q7,d21,d3
+
+ vmlal.u32 q8,d21,d5
+ it ne
+ addne r7,r0,#(48+2*9*4)
+ vmlal.u32 q5,d25,d6
+ it ne
+ addne r6,r0,#(48+3*9*4)
+ vmlal.u32 q9,d23,d5
+ vmlal.u32 q6,d27,d6
+ vmlal.u32 q7,d29,d6
+
+ vmlal.u32 q8,d29,d8
+ vorn q0,q0,q0 @ all-ones, can be redundant
+ vmlal.u32 q5,d23,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d21,d7
+ vmlal.u32 q6,d25,d8
+ vmlal.u32 q7,d27,d8
+
+ beq .Lshort_tail
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3
+ vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
+
+ vmlal.u32 q7,d24,d0
+ vmlal.u32 q5,d20,d0
+ vmlal.u32 q8,d26,d0
+ vmlal.u32 q6,d22,d0
+ vmlal.u32 q9,d28,d0
+
+ vmlal.u32 q5,d28,d2
+ vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
+ vmlal.u32 q8,d24,d1
+ vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
+ vmlal.u32 q6,d20,d1
+ vmlal.u32 q9,d26,d1
+ vmlal.u32 q7,d22,d1
+
+ vmlal.u32 q8,d22,d3
+ vld1.32 d8[1],[r7,:32]
+ vmlal.u32 q5,d26,d4
+ vld1.32 d8[0],[r6,:32]
+ vmlal.u32 q9,d24,d3
+ vmlal.u32 q6,d28,d4
+ vmlal.u32 q7,d20,d3
+
+ vmlal.u32 q8,d20,d5
+ vmlal.u32 q5,d24,d6
+ vmlal.u32 q9,d22,d5
+ vmlal.u32 q6,d26,d6
+ vmlal.u32 q7,d28,d6
+
+ vmlal.u32 q8,d28,d8
+ vorn q0,q0,q0 @ all-ones
+ vmlal.u32 q5,d22,d8
+ vshr.u64 q0,q0,#38
+ vmlal.u32 q9,d20,d7
+ vmlal.u32 q6,d24,d8
+ vmlal.u32 q7,d26,d8
+
+.Lshort_tail:
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ horizontal addition
+
+ vadd.i64 d16,d16,d17
+ vadd.i64 d10,d10,d11
+ vadd.i64 d18,d18,d19
+ vadd.i64 d12,d12,d13
+ vadd.i64 d14,d14,d15
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ lazy reduction, but without narrowing
+
+ vshr.u64 q15,q8,#26
+ vand.i64 q8,q8,q0
+ vshr.u64 q4,q5,#26
+ vand.i64 q5,q5,q0
+ vadd.i64 q9,q9,q15 @ h3 -> h4
+ vadd.i64 q6,q6,q4 @ h0 -> h1
+
+ vshr.u64 q15,q9,#26
+ vand.i64 q9,q9,q0
+ vshr.u64 q4,q6,#26
+ vand.i64 q6,q6,q0
+ vadd.i64 q7,q7,q4 @ h1 -> h2
+
+ vadd.i64 q5,q5,q15
+ vshl.u64 q15,q15,#2
+ vshr.u64 q4,q7,#26
+ vand.i64 q7,q7,q0
+ vadd.i64 q5,q5,q15 @ h4 -> h0
+ vadd.i64 q8,q8,q4 @ h2 -> h3
+
+ vshr.u64 q15,q5,#26
+ vand.i64 q5,q5,q0
+ vshr.u64 q4,q8,#26
+ vand.i64 q8,q8,q0
+ vadd.i64 q6,q6,q15 @ h0 -> h1
+ vadd.i64 q9,q9,q4 @ h3 -> h4
+
+ cmp r2,#0
+ bne .Leven
+
+ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+ @ store hash value
+
+ vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
+ vst1.32 {d18[0]},[r0]
+
+ vldmia sp!,{d8-d15} @ epilogue
+ ldmia sp!,{r4-r7}
+ bx lr @ bx lr
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+#ifndef __KERNEL__
+.LOPENSSL_armcap:
+# ifdef _WIN32
+.word OPENSSL_armcap_P
+# else
+.word OPENSSL_armcap_P-.Lpoly1305_init
+# endif
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+#endif
+.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
+.align 2
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..74a725ac89c9
--- /dev/null
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+void poly1305_init_arm(void *state, const u8 *key);
+void poly1305_blocks_arm(void *state, const u8 *src, u32 len, u32 hibit);
+void poly1305_emit_arm(void *state, __le32 *digest, const u32 *nonce);
+
+void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
+{
+}
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int arm_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void arm_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arm(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks_arm(&dctx->h, src, len, hibit);
+}
+
+static void arm_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ arm_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ arm_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int arm_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ arm_poly1305_do_update(dctx, src, srclen, false);
+ return 0;
+}
+
+static int __maybe_unused arm_poly1305_update_neon(struct shash_desc *desc,
+ const u8 *src,
+ unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ arm_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ bool do_neon = IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ crypto_simd_usable();
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_arm(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && do_neon) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks_arm(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_arm(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_arm(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int arm_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg arm_poly1305_algs[] = {{
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-arm",
+ .base.cra_priority = 150,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#ifdef CONFIG_KERNEL_MODE_NEON
+}, {
+ .init = arm_poly1305_init,
+ .update = arm_poly1305_update_neon,
+ .final = arm_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+#endif
+}};
+
+static int __init arm_poly1305_mod_init(void)
+{
+ if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) &&
+ (elf_hwcap & HWCAP_NEON))
+ static_branch_enable(&have_neon);
+ else
+ /* register only the first entry */
+ return crypto_register_shash(&arm_poly1305_algs[0]);
+
+ return crypto_register_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+static void __exit arm_poly1305_mod_exit(void)
+{
+ if (!static_branch_likely(&have_neon)) {
+ crypto_unregister_shash(&arm_poly1305_algs[0]);
+ return;
+ }
+ crypto_unregister_shashes(arm_poly1305_algs,
+ ARRAY_SIZE(arm_poly1305_algs));
+}
+
+module_init(arm_poly1305_mod_init);
+module_exit(arm_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-arm");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm/crypto/sha1-ce-core.S b/arch/arm/crypto/sha1-ce-core.S
index 49a74a441aec..8a702e051738 100644
--- a/arch/arm/crypto/sha1-ce-core.S
+++ b/arch/arm/crypto/sha1-ce-core.S
@@ -10,6 +10,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
k0 .req q0
diff --git a/arch/arm/crypto/sha2-ce-core.S b/arch/arm/crypto/sha2-ce-core.S
index 4ad517577e23..b6369d2440a1 100644
--- a/arch/arm/crypto/sha2-ce-core.S
+++ b/arch/arm/crypto/sha2-ce-core.S
@@ -10,6 +10,7 @@
#include <asm/assembler.h>
.text
+ .arch armv8-a
.fpu crypto-neon-fp-armv8
k0 .req q7
diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S
index 8c74037ade22..21b8b271c80d 100644
--- a/arch/arm/kernel/vmlinux-xip.lds.S
+++ b/arch/arm/kernel/vmlinux-xip.lds.S
@@ -70,8 +70,6 @@ SECTIONS
ARM_UNWIND_SECTIONS
#endif
- NOTES
-
_etext = .; /* End of text and rodata section */
ARM_VECTORS
@@ -114,7 +112,7 @@ SECTIONS
. = ALIGN(THREAD_SIZE);
_sdata = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
*(.data..ro_after_init)
}
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 23150c0f0f4d..319ccb10846a 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -81,8 +81,6 @@ SECTIONS
ARM_UNWIND_SECTIONS
#endif
- NOTES
-
#ifdef CONFIG_STRICT_KERNEL_RWX
. = ALIGN(1<<SECTION_SHIFT);
#else
@@ -143,7 +141,7 @@ SECTIONS
__init_end = .;
_sdata = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
BSS_SECTION(0, 0, 0)
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index 39a7d9393641..24dd5bbe60e4 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -62,13 +62,13 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
*/
void imx6q_cpuidle_fec_irqs_used(void)
{
- imx6q_cpuidle_driver.states[1].disabled = true;
+ cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, true);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_used);
void imx6q_cpuidle_fec_irqs_unused(void)
{
- imx6q_cpuidle_driver.states[1].disabled = false;
+ cpuidle_driver_state_disabled(&imx6q_cpuidle_driver, 1, false);
}
EXPORT_SYMBOL_GPL(imx6q_cpuidle_fec_irqs_unused);
diff --git a/arch/arm/mach-omap2/Makefile b/arch/arm/mach-omap2/Makefile
index 8f208197988f..1e1e86d17fc5 100644
--- a/arch/arm/mach-omap2/Makefile
+++ b/arch/arm/mach-omap2/Makefile
@@ -216,9 +216,6 @@ obj-$(CONFIG_MACH_NOKIA_N8X0) += board-n8x0.o
# Platform specific device init code
-omap-hsmmc-$(CONFIG_MMC_OMAP_HS) := hsmmc.o
-obj-y += $(omap-hsmmc-m) $(omap-hsmmc-y)
-
obj-y += omap_phy_internal.o
obj-$(CONFIG_MACH_OMAP2_TUSB6010) += usb-tusb6010.o
diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 6316da3623b3..223b37c48389 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -352,7 +352,6 @@ void omap_pcs_legacy_init(int irq, void (*rearm)(void));
struct omap_sdrc_params;
extern void omap_sdrc_init(struct omap_sdrc_params *sdrc_cs0,
struct omap_sdrc_params *sdrc_cs1);
-struct omap2_hsmmc_info;
extern void omap_reserve(void);
struct omap_hwmod;
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
deleted file mode 100644
index 63423ea6a240..000000000000
--- a/arch/arm/mach-omap2/hsmmc.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/arch/arm/mach-omap2/hsmmc.c
- *
- * Copyright (C) 2007-2008 Texas Instruments
- * Copyright (C) 2008 Nokia Corporation
- * Author: Texas Instruments
- */
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/delay.h>
-#include <linux/mmc/host.h>
-#include <linux/platform_data/hsmmc-omap.h>
-
-#include "soc.h"
-#include "omap_device.h"
-
-#include "hsmmc.h"
-#include "control.h"
-
-#if IS_ENABLED(CONFIG_MMC_OMAP_HS)
-
-static u16 control_pbias_offset;
-static u16 control_devconf1_offset;
-
-#define HSMMC_NAME_LEN 9
-
-static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
- struct omap_hsmmc_platform_data *mmc)
-{
- char *hc_name;
-
- hc_name = kzalloc(HSMMC_NAME_LEN + 1, GFP_KERNEL);
- if (!hc_name)
- return -ENOMEM;
-
- snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i", c->mmc, 1);
- mmc->name = hc_name;
- mmc->caps = c->caps;
- mmc->reg_offset = 0;
-
- return 0;
-}
-
-static int omap_hsmmc_done;
-
-void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
-{
- struct platform_device *pdev;
- int res;
-
- if (omap_hsmmc_done)
- return;
-
- omap_hsmmc_done = 1;
-
- for (; c->mmc; c++) {
- pdev = c->pdev;
- if (!pdev)
- continue;
- res = omap_device_register(pdev);
- if (res)
- pr_err("Could not late init MMC\n");
- }
-}
-
-#define MAX_OMAP_MMC_HWMOD_NAME_LEN 16
-
-static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo,
- int ctrl_nr)
-{
- struct omap_hwmod *oh;
- struct omap_hwmod *ohs[1];
- struct omap_device *od;
- struct platform_device *pdev;
- char oh_name[MAX_OMAP_MMC_HWMOD_NAME_LEN];
- struct omap_hsmmc_platform_data *mmc_data;
- struct omap_hsmmc_dev_attr *mmc_dev_attr;
- char *name;
- int res;
-
- mmc_data = kzalloc(sizeof(*mmc_data), GFP_KERNEL);
- if (!mmc_data)
- return;
-
- res = omap_hsmmc_pdata_init(hsmmcinfo, mmc_data);
- if (res < 0)
- goto free_mmc;
-
- name = "omap_hsmmc";
- res = snprintf(oh_name, MAX_OMAP_MMC_HWMOD_NAME_LEN,
- "mmc%d", ctrl_nr);
- WARN(res >= MAX_OMAP_MMC_HWMOD_NAME_LEN,
- "String buffer overflow in MMC%d device setup\n", ctrl_nr);
-
- oh = omap_hwmod_lookup(oh_name);
- if (!oh) {
- pr_err("Could not look up %s\n", oh_name);
- goto free_name;
- }
- ohs[0] = oh;
- if (oh->dev_attr != NULL) {
- mmc_dev_attr = oh->dev_attr;
- mmc_data->controller_flags = mmc_dev_attr->flags;
- }
-
- pdev = platform_device_alloc(name, ctrl_nr - 1);
- if (!pdev) {
- pr_err("Could not allocate pdev for %s\n", name);
- goto free_name;
- }
- dev_set_name(&pdev->dev, "%s.%d", pdev->name, pdev->id);
-
- od = omap_device_alloc(pdev, ohs, 1);
- if (IS_ERR(od)) {
- pr_err("Could not allocate od for %s\n", name);
- goto put_pdev;
- }
-
- res = platform_device_add_data(pdev, mmc_data,
- sizeof(struct omap_hsmmc_platform_data));
- if (res) {
- pr_err("Could not add pdata for %s\n", name);
- goto put_pdev;
- }
-
- hsmmcinfo->pdev = pdev;
-
- res = omap_device_register(pdev);
- if (res) {
- pr_err("Could not register od for %s\n", name);
- goto free_od;
- }
-
- goto free_mmc;
-
-free_od:
- omap_device_delete(od);
-
-put_pdev:
- platform_device_put(pdev);
-
-free_name:
- kfree(mmc_data->name);
-
-free_mmc:
- kfree(mmc_data);
-}
-
-void __init omap_hsmmc_init(struct omap2_hsmmc_info *controllers)
-{
- if (omap_hsmmc_done)
- return;
-
- omap_hsmmc_done = 1;
-
- if (cpu_is_omap2430()) {
- control_pbias_offset = OMAP243X_CONTROL_PBIAS_LITE;
- control_devconf1_offset = OMAP243X_CONTROL_DEVCONF1;
- } else {
- control_pbias_offset = OMAP343X_CONTROL_PBIAS_LITE;
- control_devconf1_offset = OMAP343X_CONTROL_DEVCONF1;
- }
-
- for (; controllers->mmc; controllers++)
- omap_hsmmc_init_one(controllers, controllers->mmc);
-
-}
-
-#endif
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
deleted file mode 100644
index 76c5ed2afa72..000000000000
--- a/arch/arm/mach-omap2/hsmmc.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * MMC definitions for OMAP2
- */
-
-struct mmc_card;
-
-struct omap2_hsmmc_info {
- u8 mmc; /* controller 1/2/3 */
- u32 caps; /* 4/8 wires and any additional host
- * capabilities OR'd (ref. linux/mmc/host.h) */
- struct platform_device *pdev; /* mmc controller instance */
- /* init some special card */
- void (*init_card)(struct mmc_card *card);
-};
-
-#if IS_ENABLED(CONFIG_MMC_OMAP_HS)
-
-void omap_hsmmc_init(struct omap2_hsmmc_info *);
-void omap_hsmmc_late_init(struct omap2_hsmmc_info *);
-
-#else
-
-static inline void omap_hsmmc_init(struct omap2_hsmmc_info *info)
-{
-}
-
-static inline void omap_hsmmc_late_init(struct omap2_hsmmc_info *info)
-{
-}
-
-#endif
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 2efd18e8824c..c47a2afc91e5 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -7,7 +7,6 @@
#include <linux/clk.h>
#include <linux/davinci_emac.h>
#include <linux/gpio.h>
-#include <linux/gpio/machine.h>
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/of_platform.h>
@@ -33,7 +32,6 @@
#include "omap_device.h"
#include "omap-secure.h"
#include "soc.h"
-#include "hsmmc.h"
static struct omap_hsmmc_platform_data __maybe_unused mmc_pdata[2];
@@ -269,21 +267,13 @@ static void __init am3517_evm_legacy_init(void)
am35xx_emac_reset();
}
-static struct platform_device omap3_rom_rng_device = {
- .name = "omap3-rom-rng",
- .id = -1,
- .dev = {
- .platform_data = rx51_secure_rng_call,
- },
-};
-
static void __init nokia_n900_legacy_init(void)
{
hsmmc2_internal_input_clk();
mmc_pdata[0].name = "external";
mmc_pdata[1].name = "internal";
- if (omap_type() == OMAP2_DEVICE_TYPE_SEC) {
+ if (omap_type() != OMAP2_DEVICE_TYPE_GP) {
if (IS_ENABLED(CONFIG_ARM_ERRATA_430973)) {
pr_info("RX-51: Enabling ARM errata 430973 workaround\n");
/* set IBE to 1 */
@@ -292,9 +282,6 @@ static void __init nokia_n900_legacy_init(void)
pr_warn("RX-51: Not enabling ARM errata 430973 workaround\n");
pr_warn("Thumb binaries may crash randomly without this workaround\n");
}
-
- pr_info("RX-51: Registering OMAP3 HWRNG device\n");
- platform_device_register(&omap3_rom_rng_device);
}
}
@@ -311,118 +298,15 @@ static void __init omap3_logicpd_torpedo_init(void)
}
/* omap3pandora legacy devices */
-#define PANDORA_WIFI_IRQ_GPIO 21
-#define PANDORA_WIFI_NRESET_GPIO 23
static struct platform_device pandora_backlight = {
.name = "pandora-backlight",
.id = -1,
};
-static struct regulator_consumer_supply pandora_vmmc3_supply[] = {
- REGULATOR_SUPPLY("vmmc", "omap_hsmmc.2"),
-};
-
-static struct regulator_init_data pandora_vmmc3 = {
- .constraints = {
- .valid_ops_mask = REGULATOR_CHANGE_STATUS,
- },
- .num_consumer_supplies = ARRAY_SIZE(pandora_vmmc3_supply),
- .consumer_supplies = pandora_vmmc3_supply,
-};
-
-static struct fixed_voltage_config pandora_vwlan = {
- .supply_name = "vwlan",
- .microvolts = 1800000, /* 1.8V */
- .startup_delay = 50000, /* 50ms */
- .init_data = &pandora_vmmc3,
-};
-
-static struct platform_device pandora_vwlan_device = {
- .name = "reg-fixed-voltage",
- .id = 1,
- .dev = {
- .platform_data = &pandora_vwlan,
- },
-};
-
-static struct gpiod_lookup_table pandora_vwlan_gpiod_table = {
- .dev_id = "reg-fixed-voltage.1",
- .table = {
- /*
- * As this is a low GPIO number it should be at the first
- * GPIO bank.
- */
- GPIO_LOOKUP("gpio-0-31", PANDORA_WIFI_NRESET_GPIO,
- NULL, GPIO_ACTIVE_HIGH),
- { },
- },
-};
-
-static void pandora_wl1251_init_card(struct mmc_card *card)
-{
- /*
- * We have TI wl1251 attached to MMC3. Pass this information to
- * SDIO core because it can't be probed by normal methods.
- */
- if (card->type == MMC_TYPE_SDIO || card->type == MMC_TYPE_SD_COMBO) {
- card->quirks |= MMC_QUIRK_NONSTD_SDIO;
- card->cccr.wide_bus = 1;
- card->cis.vendor = 0x104c;
- card->cis.device = 0x9066;
- card->cis.blksize = 512;
- card->cis.max_dtr = 24000000;
- card->ocr = 0x80;
- }
-}
-
-static struct omap2_hsmmc_info pandora_mmc3[] = {
- {
- .mmc = 3,
- .caps = MMC_CAP_4_BIT_DATA | MMC_CAP_POWER_OFF_CARD,
- .init_card = pandora_wl1251_init_card,
- },
- {} /* Terminator */
-};
-
-static void __init pandora_wl1251_init(void)
-{
- struct wl1251_platform_data pandora_wl1251_pdata;
- int ret;
-
- memset(&pandora_wl1251_pdata, 0, sizeof(pandora_wl1251_pdata));
-
- pandora_wl1251_pdata.power_gpio = -1;
-
- ret = gpio_request_one(PANDORA_WIFI_IRQ_GPIO, GPIOF_IN, "wl1251 irq");
- if (ret < 0)
- goto fail;
-
- pandora_wl1251_pdata.irq = gpio_to_irq(PANDORA_WIFI_IRQ_GPIO);
- if (pandora_wl1251_pdata.irq < 0)
- goto fail_irq;
-
- pandora_wl1251_pdata.use_eeprom = true;
- ret = wl1251_set_platform_data(&pandora_wl1251_pdata);
- if (ret < 0)
- goto fail_irq;
-
- return;
-
-fail_irq:
- gpio_free(PANDORA_WIFI_IRQ_GPIO);
-fail:
- pr_err("wl1251 board initialisation failed\n");
-}
-
static void __init omap3_pandora_legacy_init(void)
{
platform_device_register(&pandora_backlight);
- gpiod_add_lookup_table(&pandora_vwlan_gpiod_table);
- platform_device_register(&pandora_vwlan_device);
- omap_hsmmc_init(pandora_mmc3);
- omap_hsmmc_late_init(pandora_mmc3);
- pandora_wl1251_init();
}
#endif /* CONFIG_ARCH_OMAP3 */
@@ -638,6 +522,7 @@ static struct of_dev_auxdata omap_auxdata_lookup[] = {
OF_DEV_AUXDATA("ti,davinci_mdio", 0x5c030000, "davinci_mdio.0", NULL),
OF_DEV_AUXDATA("ti,am3517-emac", 0x5c000000, "davinci_emac.0",
&am35xx_emac_pdata),
+ OF_DEV_AUXDATA("nokia,n900-rom-rng", 0, NULL, rx51_secure_rng_call),
/* McBSP modules with sidetone core */
#if IS_ENABLED(CONFIG_SND_SOC_OMAP_MCBSP)
OF_DEV_AUXDATA("ti,omap3-mcbsp", 0x49022000, "49022000.mcbsp", &mcbsp_pdata),
diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 865b10344ea2..0474a4b1394d 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -12,6 +12,7 @@
#include <linux/irq.h>
#include <linux/platform_device.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <asm/mach-types.h>
@@ -22,7 +23,6 @@
#include <linux/spi/spi.h>
#include <linux/spi/pxa2xx_spi.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/machine.h>
#include "generic.h"
@@ -69,8 +69,9 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
.gpio_cs = ICONTROL_MCP251x_nCS4
};
-static struct mcp251x_platform_data mcp251x_info = {
- .oscillator_frequency = 16E6,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info mcp251x_board_info[] = {
@@ -79,7 +80,7 @@ static struct spi_board_info mcp251x_board_info[] = {
.max_speed_hz = 6500000,
.bus_num = 3,
.chip_select = 0,
- .platform_data = &mcp251x_info,
+ .properties = mcp251x_properties,
.controller_data = &mcp251x_chip_info1,
.irq = PXA_GPIO_TO_IRQ(ICONTROL_MCP251x_nIRQ1)
},
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index da113c8eefbf..b27fc7ac9cea 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -13,6 +13,7 @@
#include <linux/leds.h>
#include <linux/irq.h>
#include <linux/pm.h>
+#include <linux/property.h>
#include <linux/gpio.h>
#include <linux/gpio/machine.h>
#include <linux/serial_8250.h>
@@ -27,7 +28,6 @@
#include <linux/platform_data/i2c-pxa.h>
#include <linux/platform_data/pca953x.h>
#include <linux/apm-emulation.h>
-#include <linux/can/platform/mcp251x.h>
#include <linux/regulator/fixed.h>
#include <linux/regulator/machine.h>
@@ -428,14 +428,15 @@ static struct gpiod_lookup_table can_regulator_gpiod_table = {
},
};
-static struct mcp251x_platform_data zeus_mcp2515_pdata = {
- .oscillator_frequency = 16*1000*1000,
+static const struct property_entry mcp251x_properties[] = {
+ PROPERTY_ENTRY_U32("clock-frequency", 16000000),
+ {}
};
static struct spi_board_info zeus_spi_board_info[] = {
[0] = {
.modalias = "mcp2515",
- .platform_data = &zeus_mcp2515_pdata,
+ .properties = mcp251x_properties,
.irq = PXA_GPIO_TO_IRQ(ZEUS_CAN_GPIO),
.max_speed_hz = 1*1000*1000,
.bus_num = 3,
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 2447427cb4a8..69f3fa270fbe 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -203,7 +203,7 @@ void tegra20_cpuidle_pcie_irqs_in_use(void)
{
pr_info_once(
"Disabling cpuidle LP2 state, since PCIe IRQs are in use\n");
- tegra_idle_driver.states[1].disabled = true;
+ cpuidle_driver_state_disabled(&tegra_idle_driver, 1, true);
}
int __init tegra20_cpuidle_init(void)
diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 9a6e4923bd69..563440315acd 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -89,7 +89,7 @@ void pxa_ssp_free(struct ssp_device *ssp)
ssp->use_count--;
ssp->label = NULL;
} else
- dev_err(&ssp->pdev->dev, "device already free\n");
+ dev_err(ssp->dev, "device already free\n");
mutex_unlock(&ssp_lock);
}
EXPORT_SYMBOL(pxa_ssp_free);
@@ -118,7 +118,7 @@ static int pxa_ssp_probe(struct platform_device *pdev)
if (ssp == NULL)
return -ENOMEM;
- ssp->pdev = pdev;
+ ssp->dev = dev;
ssp->clk = devm_clk_get(dev, NULL);
if (IS_ERR(ssp->clk))
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index d66a9727344d..afe6412fe769 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -67,7 +67,7 @@ config ARM64
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_ATOMIC_RMW
- select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
@@ -182,7 +182,6 @@ config ARM64
select PCI_SYSCALL if PCI
select POWER_RESET
select POWER_SUPPLY
- select REFCOUNT_FULL
select SPARSE_IRQ
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
diff --git a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
index 1d05d570142f..ce4b0679839d 100644
--- a/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
+++ b/arch/arm64/boot/dts/allwinner/sun50i-h6-beelink-gs1.dts
@@ -252,6 +252,7 @@
};
&r_ir {
+ linux,rc-map-name = "rc-beelink-gs1";
status = "okay";
};
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4922c4451e7c..b8eb0453123d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -86,7 +86,7 @@ config CRYPTO_AES_ARM64_CE_CCM
config CRYPTO_AES_ARM64_CE_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64_CE
select CRYPTO_AES_ARM64
select CRYPTO_SIMD
@@ -94,7 +94,7 @@ config CRYPTO_AES_ARM64_CE_BLK
config CRYPTO_AES_ARM64_NEON_BLK
tristate "AES in ECB/CBC/CTR/XTS modes using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64
select CRYPTO_LIB_AES
select CRYPTO_SIMD
@@ -102,8 +102,15 @@ config CRYPTO_AES_ARM64_NEON_BLK
config CRYPTO_CHACHA20_NEON
tristate "ChaCha20, XChaCha20, and XChaCha12 stream ciphers using NEON instructions"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
- select CRYPTO_CHACHA20
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+
+config CRYPTO_POLY1305_NEON
+ tristate "Poly1305 hash function using scalar or NEON instructions"
+ depends on KERNEL_MODE_NEON
+ select CRYPTO_HASH
+ select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_NHPOLY1305_NEON
tristate "NHPoly1305 hash function using NEON instructions (for Adiantum)"
@@ -113,7 +120,7 @@ config CRYPTO_NHPOLY1305_NEON
config CRYPTO_AES_ARM64_BS
tristate "AES in ECB/CBC/CTR/XTS modes using bit-sliced NEON algorithm"
depends on KERNEL_MODE_NEON
- select CRYPTO_BLKCIPHER
+ select CRYPTO_SKCIPHER
select CRYPTO_AES_ARM64_NEON_BLK
select CRYPTO_AES_ARM64
select CRYPTO_LIB_AES
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 0435f2a0610e..d0901e610df3 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -50,6 +50,10 @@ sha512-arm64-y := sha512-glue.o sha512-core.o
obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
chacha-neon-y := chacha-neon-core.o chacha-neon-glue.o
+obj-$(CONFIG_CRYPTO_POLY1305_NEON) += poly1305-neon.o
+poly1305-neon-y := poly1305-core.o poly1305-glue.o
+AFLAGS_poly1305-core.o += -Dpoly1305_init=poly1305_init_arm64
+
obj-$(CONFIG_CRYPTO_NHPOLY1305_NEON) += nhpoly1305-neon.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
@@ -68,11 +72,15 @@ ifdef REGENERATE_ARM64_CRYPTO
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
+$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+ $(call cmd,perlasm)
+
$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
+
endif
-clean-files += sha256-core.S sha512-core.S
+clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index ea873b8904c4..e3e27349a9fe 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -384,7 +384,7 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt,
goto xts_tail;
kernel_neon_end();
- skcipher_walk_done(&walk, nbytes);
+ err = skcipher_walk_done(&walk, nbytes);
}
if (err || likely(!tail))
diff --git a/arch/arm64/crypto/chacha-neon-glue.c b/arch/arm64/crypto/chacha-neon-glue.c
index 1495d2b18518..b08029d7bde6 100644
--- a/arch/arm64/crypto/chacha-neon-glue.c
+++ b/arch/arm64/crypto/chacha-neon-glue.c
@@ -1,5 +1,5 @@
/*
- * ARM NEON accelerated ChaCha and XChaCha stream ciphers,
+ * ARM NEON and scalar accelerated ChaCha and XChaCha stream ciphers,
* including ChaCha20 (RFC7539)
*
* Copyright (C) 2016 - 2017 Linaro, Ltd. <ard.biesheuvel@linaro.org>
@@ -20,9 +20,10 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
@@ -36,6 +37,8 @@ asmlinkage void chacha_4block_xor_neon(u32 *state, u8 *dst, const u8 *src,
int nrounds, int bytes);
asmlinkage void hchacha_block_neon(const u32 *state, u32 *out, int nrounds);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
int bytes, int nrounds)
{
@@ -59,6 +62,37 @@ static void chacha_doneon(u32 *state, u8 *dst, const u8 *src,
}
}
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_neon_begin();
+ hchacha_block_neon(state, stream, nrounds);
+ kernel_neon_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ if (!static_branch_likely(&have_neon) || bytes <= CHACHA_BLOCK_SIZE ||
+ !crypto_simd_usable())
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_neon_begin();
+ chacha_doneon(state, dst, src, bytes, nrounds);
+ kernel_neon_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
static int chacha_neon_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
@@ -68,7 +102,7 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
err = skcipher_walk_virt(&walk, req, false);
- crypto_chacha_init(state, ctx, iv);
+ chacha_init_generic(state, ctx->key, iv);
while (walk.nbytes > 0) {
unsigned int nbytes = walk.nbytes;
@@ -76,10 +110,17 @@ static int chacha_neon_stream_xor(struct skcipher_request *req,
if (nbytes < walk.total)
nbytes = rounddown(nbytes, walk.stride);
- kernel_neon_begin();
- chacha_doneon(state, walk.dst.virt.addr, walk.src.virt.addr,
- nbytes, ctx->nrounds);
- kernel_neon_end();
+ if (!static_branch_likely(&have_neon) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
+ kernel_neon_begin();
+ chacha_doneon(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes, ctx->nrounds);
+ kernel_neon_end();
+ }
err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
@@ -91,9 +132,6 @@ static int chacha_neon(struct skcipher_request *req)
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
-
return chacha_neon_stream_xor(req, ctx, req->iv);
}
@@ -105,14 +143,8 @@ static int xchacha_neon(struct skcipher_request *req)
u32 state[16];
u8 real_iv[16];
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_neon_begin();
- hchacha_block_neon(state, subctx.key, ctx->nrounds);
- kernel_neon_end();
+ chacha_init_generic(state, ctx->key, req->iv);
+ hchacha_block_arch(state, subctx.key, ctx->nrounds);
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
@@ -134,7 +166,7 @@ static struct skcipher_alg algs[] = {
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_neon,
.decrypt = chacha_neon,
}, {
@@ -150,7 +182,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}, {
@@ -166,7 +198,7 @@ static struct skcipher_alg algs[] = {
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
.walksize = 5 * CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_neon,
.decrypt = xchacha_neon,
}
@@ -175,14 +207,17 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!cpu_have_named_feature(ASIMD))
- return -ENODEV;
+ return 0;
+
+ static_branch_enable(&have_neon);
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index 410e8afcf5a7..a791c4adf8e6 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -13,8 +13,8 @@
T1 .req v2
T2 .req v3
MASK .req v4
- XL .req v5
- XM .req v6
+ XM .req v5
+ XL .req v6
XH .req v7
IN1 .req v7
@@ -358,20 +358,37 @@ ENTRY(pmull_ghash_update_p8)
__pmull_ghash p8
ENDPROC(pmull_ghash_update_p8)
- KS0 .req v12
- KS1 .req v13
- INP0 .req v14
- INP1 .req v15
-
- .macro load_round_keys, rounds, rk
- cmp \rounds, #12
- blo 2222f /* 128 bits */
- beq 1111f /* 192 bits */
- ld1 {v17.4s-v18.4s}, [\rk], #32
-1111: ld1 {v19.4s-v20.4s}, [\rk], #32
-2222: ld1 {v21.4s-v24.4s}, [\rk], #64
- ld1 {v25.4s-v28.4s}, [\rk], #64
- ld1 {v29.4s-v31.4s}, [\rk]
+ KS0 .req v8
+ KS1 .req v9
+ KS2 .req v10
+ KS3 .req v11
+
+ INP0 .req v21
+ INP1 .req v22
+ INP2 .req v23
+ INP3 .req v24
+
+ K0 .req v25
+ K1 .req v26
+ K2 .req v27
+ K3 .req v28
+ K4 .req v12
+ K5 .req v13
+ K6 .req v4
+ K7 .req v5
+ K8 .req v14
+ K9 .req v15
+ KK .req v29
+ KL .req v30
+ KM .req v31
+
+ .macro load_round_keys, rounds, rk, tmp
+ add \tmp, \rk, #64
+ ld1 {K0.4s-K3.4s}, [\rk]
+ ld1 {K4.4s-K5.4s}, [\tmp]
+ add \tmp, \rk, \rounds, lsl #4
+ sub \tmp, \tmp, #32
+ ld1 {KK.4s-KM.4s}, [\tmp]
.endm
.macro enc_round, state, key
@@ -379,197 +396,367 @@ ENDPROC(pmull_ghash_update_p8)
aesmc \state\().16b, \state\().16b
.endm
- .macro enc_block, state, rounds
- cmp \rounds, #12
- b.lo 2222f /* 128 bits */
- b.eq 1111f /* 192 bits */
- enc_round \state, v17
- enc_round \state, v18
-1111: enc_round \state, v19
- enc_round \state, v20
-2222: .irp key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+ .macro enc_qround, s0, s1, s2, s3, key
+ enc_round \s0, \key
+ enc_round \s1, \key
+ enc_round \s2, \key
+ enc_round \s3, \key
+ .endm
+
+ .macro enc_block, state, rounds, rk, tmp
+ add \tmp, \rk, #96
+ ld1 {K6.4s-K7.4s}, [\tmp], #32
+ .irp key, K0, K1, K2, K3, K4 K5
enc_round \state, \key
.endr
- aese \state\().16b, v30.16b
- eor \state\().16b, \state\().16b, v31.16b
+
+ tbnz \rounds, #2, .Lnot128_\@
+.Lout256_\@:
+ enc_round \state, K6
+ enc_round \state, K7
+
+.Lout192_\@:
+ enc_round \state, KK
+ aese \state\().16b, KL.16b
+ eor \state\().16b, \state\().16b, KM.16b
+
+ .subsection 1
+.Lnot128_\@:
+ ld1 {K8.4s-K9.4s}, [\tmp], #32
+ enc_round \state, K6
+ enc_round \state, K7
+ ld1 {K6.4s-K7.4s}, [\tmp]
+ enc_round \state, K8
+ enc_round \state, K9
+ tbz \rounds, #1, .Lout192_\@
+ b .Lout256_\@
+ .previous
.endm
+ .align 6
.macro pmull_gcm_do_crypt, enc
- ld1 {SHASH.2d}, [x4], #16
- ld1 {HH.2d}, [x4]
- ld1 {XL.2d}, [x1]
- ldr x8, [x5, #8] // load lower counter
+ stp x29, x30, [sp, #-32]!
+ mov x29, sp
+ str x19, [sp, #24]
+
+ load_round_keys x7, x6, x8
+
+ ld1 {SHASH.2d}, [x3], #16
+ ld1 {HH.2d-HH4.2d}, [x3]
- movi MASK.16b, #0xe1
trn1 SHASH2.2d, SHASH.2d, HH.2d
trn2 T1.2d, SHASH.2d, HH.2d
-CPU_LE( rev x8, x8 )
- shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, T1.16b
- .if \enc == 1
- ldr x10, [sp]
- ld1 {KS0.16b-KS1.16b}, [x10]
- .endif
+ trn1 HH34.2d, HH3.2d, HH4.2d
+ trn2 T1.2d, HH3.2d, HH4.2d
+ eor HH34.16b, HH34.16b, T1.16b
- cbnz x6, 4f
+ ld1 {XL.2d}, [x4]
-0: ld1 {INP0.16b-INP1.16b}, [x3], #32
+ cbz x0, 3f // tag only?
- rev x9, x8
- add x11, x8, #1
- add x8, x8, #2
+ ldr w8, [x5, #12] // load lower counter
+CPU_LE( rev w8, w8 )
- .if \enc == 1
- eor INP0.16b, INP0.16b, KS0.16b // encrypt input
- eor INP1.16b, INP1.16b, KS1.16b
+0: mov w9, #4 // max blocks per round
+ add x10, x0, #0xf
+ lsr x10, x10, #4 // remaining blocks
+
+ subs x0, x0, #64
+ csel w9, w10, w9, mi
+ add w8, w8, w9
+
+ bmi 1f
+ ld1 {INP0.16b-INP3.16b}, [x2], #64
+ .subsection 1
+ /*
+ * Populate the four input registers right to left with up to 63 bytes
+ * of data, using overlapping loads to avoid branches.
+ *
+ * INP0 INP1 INP2 INP3
+ * 1 byte | | | |x |
+ * 16 bytes | | | |xxxxxxxx|
+ * 17 bytes | | |xxxxxxxx|x |
+ * 47 bytes | |xxxxxxxx|xxxxxxxx|xxxxxxx |
+ * etc etc
+ *
+ * Note that this code may read up to 15 bytes before the start of
+ * the input. It is up to the calling code to ensure this is safe if
+ * this happens in the first iteration of the loop (i.e., when the
+ * input size is < 16 bytes)
+ */
+1: mov x15, #16
+ ands x19, x0, #0xf
+ csel x19, x19, x15, ne
+ adr_l x17, .Lpermute_table + 16
+
+ sub x11, x15, x19
+ add x12, x17, x11
+ sub x17, x17, x11
+ ld1 {T1.16b}, [x12]
+ sub x10, x1, x11
+ sub x11, x2, x11
+
+ cmp x0, #-16
+ csel x14, x15, xzr, gt
+ cmp x0, #-32
+ csel x15, x15, xzr, gt
+ cmp x0, #-48
+ csel x16, x19, xzr, gt
+ csel x1, x1, x10, gt
+ csel x2, x2, x11, gt
+
+ ld1 {INP0.16b}, [x2], x14
+ ld1 {INP1.16b}, [x2], x15
+ ld1 {INP2.16b}, [x2], x16
+ ld1 {INP3.16b}, [x2]
+ tbl INP3.16b, {INP3.16b}, T1.16b
+ b 2f
+ .previous
+
+2: .if \enc == 0
+ bl pmull_gcm_ghash_4x
.endif
- ld1 {KS0.8b}, [x5] // load upper counter
- rev x11, x11
- sub w0, w0, #2
- mov KS1.8b, KS0.8b
- ins KS0.d[1], x9 // set lower counter
- ins KS1.d[1], x11
+ bl pmull_gcm_enc_4x
- rev64 T1.16b, INP1.16b
+ tbnz x0, #63, 6f
+ st1 {INP0.16b-INP3.16b}, [x1], #64
+ .if \enc == 1
+ bl pmull_gcm_ghash_4x
+ .endif
+ bne 0b
- cmp w7, #12
- b.ge 2f // AES-192/256?
+3: ldp x19, x10, [sp, #24]
+ cbz x10, 5f // output tag?
-1: enc_round KS0, v21
- ext IN1.16b, T1.16b, T1.16b, #8
+ ld1 {INP3.16b}, [x10] // load lengths[]
+ mov w9, #1
+ bl pmull_gcm_ghash_4x
- enc_round KS1, v21
- pmull2 XH2.1q, SHASH.2d, IN1.2d // a1 * b1
+ mov w11, #(0x1 << 24) // BE '1U'
+ ld1 {KS0.16b}, [x5]
+ mov KS0.s[3], w11
- enc_round KS0, v22
- eor T1.16b, T1.16b, IN1.16b
+ enc_block KS0, x7, x6, x12
- enc_round KS1, v22
- pmull XL2.1q, SHASH.1d, IN1.1d // a0 * b0
+ ext XL.16b, XL.16b, XL.16b, #8
+ rev64 XL.16b, XL.16b
+ eor XL.16b, XL.16b, KS0.16b
+ st1 {XL.16b}, [x10] // store tag
- enc_round KS0, v23
- pmull XM2.1q, SHASH2.1d, T1.1d // (a1 + a0)(b1 + b0)
+4: ldp x29, x30, [sp], #32
+ ret
- enc_round KS1, v23
- rev64 T1.16b, INP0.16b
- ext T2.16b, XL.16b, XL.16b, #8
+5:
+CPU_LE( rev w8, w8 )
+ str w8, [x5, #12] // store lower counter
+ st1 {XL.2d}, [x4]
+ b 4b
+
+6: ld1 {T1.16b-T2.16b}, [x17], #32 // permute vectors
+ sub x17, x17, x19, lsl #1
+
+ cmp w9, #1
+ beq 7f
+ .subsection 1
+7: ld1 {INP2.16b}, [x1]
+ tbx INP2.16b, {INP3.16b}, T1.16b
+ mov INP3.16b, INP2.16b
+ b 8f
+ .previous
+
+ st1 {INP0.16b}, [x1], x14
+ st1 {INP1.16b}, [x1], x15
+ st1 {INP2.16b}, [x1], x16
+ tbl INP3.16b, {INP3.16b}, T1.16b
+ tbx INP3.16b, {INP2.16b}, T2.16b
+8: st1 {INP3.16b}, [x1]
- enc_round KS0, v24
- ext IN1.16b, T1.16b, T1.16b, #8
- eor T1.16b, T1.16b, T2.16b
+ .if \enc == 1
+ ld1 {T1.16b}, [x17]
+ tbl INP3.16b, {INP3.16b}, T1.16b // clear non-data bits
+ bl pmull_gcm_ghash_4x
+ .endif
+ b 3b
+ .endm
- enc_round KS1, v24
- eor XL.16b, XL.16b, IN1.16b
+ /*
+ * void pmull_gcm_encrypt(int blocks, u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u64 dg[], u8 ctr[],
+ * int rounds, u8 tag)
+ */
+ENTRY(pmull_gcm_encrypt)
+ pmull_gcm_do_crypt 1
+ENDPROC(pmull_gcm_encrypt)
- enc_round KS0, v25
- eor T1.16b, T1.16b, XL.16b
+ /*
+ * void pmull_gcm_decrypt(int blocks, u8 dst[], const u8 src[],
+ * struct ghash_key const *k, u64 dg[], u8 ctr[],
+ * int rounds, u8 tag)
+ */
+ENTRY(pmull_gcm_decrypt)
+ pmull_gcm_do_crypt 0
+ENDPROC(pmull_gcm_decrypt)
- enc_round KS1, v25
- pmull2 XH.1q, HH.2d, XL.2d // a1 * b1
+pmull_gcm_ghash_4x:
+ movi MASK.16b, #0xe1
+ shl MASK.2d, MASK.2d, #57
- enc_round KS0, v26
- pmull XL.1q, HH.1d, XL.1d // a0 * b0
+ rev64 T1.16b, INP0.16b
+ rev64 T2.16b, INP1.16b
+ rev64 TT3.16b, INP2.16b
+ rev64 TT4.16b, INP3.16b
- enc_round KS1, v26
- pmull2 XM.1q, SHASH2.2d, T1.2d // (a1 + a0)(b1 + b0)
+ ext XL.16b, XL.16b, XL.16b, #8
- enc_round KS0, v27
- eor XL.16b, XL.16b, XL2.16b
- eor XH.16b, XH.16b, XH2.16b
+ tbz w9, #2, 0f // <4 blocks?
+ .subsection 1
+0: movi XH2.16b, #0
+ movi XM2.16b, #0
+ movi XL2.16b, #0
- enc_round KS1, v27
- eor XM.16b, XM.16b, XM2.16b
- ext T1.16b, XL.16b, XH.16b, #8
+ tbz w9, #0, 1f // 2 blocks?
+ tbz w9, #1, 2f // 1 block?
- enc_round KS0, v28
- eor T2.16b, XL.16b, XH.16b
- eor XM.16b, XM.16b, T1.16b
+ eor T2.16b, T2.16b, XL.16b
+ ext T1.16b, T2.16b, T2.16b, #8
+ b .Lgh3
- enc_round KS1, v28
- eor XM.16b, XM.16b, T2.16b
+1: eor TT3.16b, TT3.16b, XL.16b
+ ext T2.16b, TT3.16b, TT3.16b, #8
+ b .Lgh2
- enc_round KS0, v29
- pmull T2.1q, XL.1d, MASK.1d
+2: eor TT4.16b, TT4.16b, XL.16b
+ ext IN1.16b, TT4.16b, TT4.16b, #8
+ b .Lgh1
+ .previous
- enc_round KS1, v29
- mov XH.d[0], XM.d[1]
- mov XM.d[1], XL.d[0]
+ eor T1.16b, T1.16b, XL.16b
+ ext IN1.16b, T1.16b, T1.16b, #8
- aese KS0.16b, v30.16b
- eor XL.16b, XM.16b, T2.16b
+ pmull2 XH2.1q, HH4.2d, IN1.2d // a1 * b1
+ eor T1.16b, T1.16b, IN1.16b
+ pmull XL2.1q, HH4.1d, IN1.1d // a0 * b0
+ pmull2 XM2.1q, HH34.2d, T1.2d // (a1 + a0)(b1 + b0)
- aese KS1.16b, v30.16b
- ext T2.16b, XL.16b, XL.16b, #8
+ ext T1.16b, T2.16b, T2.16b, #8
+.Lgh3: eor T2.16b, T2.16b, T1.16b
+ pmull2 XH.1q, HH3.2d, T1.2d // a1 * b1
+ pmull XL.1q, HH3.1d, T1.1d // a0 * b0
+ pmull XM.1q, HH34.1d, T2.1d // (a1 + a0)(b1 + b0)
- eor KS0.16b, KS0.16b, v31.16b
- pmull XL.1q, XL.1d, MASK.1d
- eor T2.16b, T2.16b, XH.16b
+ eor XH2.16b, XH2.16b, XH.16b
+ eor XL2.16b, XL2.16b, XL.16b
+ eor XM2.16b, XM2.16b, XM.16b
- eor KS1.16b, KS1.16b, v31.16b
- eor XL.16b, XL.16b, T2.16b
+ ext T2.16b, TT3.16b, TT3.16b, #8
+.Lgh2: eor TT3.16b, TT3.16b, T2.16b
+ pmull2 XH.1q, HH.2d, T2.2d // a1 * b1
+ pmull XL.1q, HH.1d, T2.1d // a0 * b0
+ pmull2 XM.1q, SHASH2.2d, TT3.2d // (a1 + a0)(b1 + b0)
- .if \enc == 0
- eor INP0.16b, INP0.16b, KS0.16b
- eor INP1.16b, INP1.16b, KS1.16b
- .endif
+ eor XH2.16b, XH2.16b, XH.16b
+ eor XL2.16b, XL2.16b, XL.16b
+ eor XM2.16b, XM2.16b, XM.16b
- st1 {INP0.16b-INP1.16b}, [x2], #32
+ ext IN1.16b, TT4.16b, TT4.16b, #8
+.Lgh1: eor TT4.16b, TT4.16b, IN1.16b
+ pmull XL.1q, SHASH.1d, IN1.1d // a0 * b0
+ pmull2 XH.1q, SHASH.2d, IN1.2d // a1 * b1
+ pmull XM.1q, SHASH2.1d, TT4.1d // (a1 + a0)(b1 + b0)
- cbnz w0, 0b
+ eor XH.16b, XH.16b, XH2.16b
+ eor XL.16b, XL.16b, XL2.16b
+ eor XM.16b, XM.16b, XM2.16b
-CPU_LE( rev x8, x8 )
- st1 {XL.2d}, [x1]
- str x8, [x5, #8] // store lower counter
+ eor T2.16b, XL.16b, XH.16b
+ ext T1.16b, XL.16b, XH.16b, #8
+ eor XM.16b, XM.16b, T2.16b
- .if \enc == 1
- st1 {KS0.16b-KS1.16b}, [x10]
- .endif
+ __pmull_reduce_p64
+
+ eor T2.16b, T2.16b, XH.16b
+ eor XL.16b, XL.16b, T2.16b
ret
+ENDPROC(pmull_gcm_ghash_4x)
+
+pmull_gcm_enc_4x:
+ ld1 {KS0.16b}, [x5] // load upper counter
+ sub w10, w8, #4
+ sub w11, w8, #3
+ sub w12, w8, #2
+ sub w13, w8, #1
+ rev w10, w10
+ rev w11, w11
+ rev w12, w12
+ rev w13, w13
+ mov KS1.16b, KS0.16b
+ mov KS2.16b, KS0.16b
+ mov KS3.16b, KS0.16b
+ ins KS0.s[3], w10 // set lower counter
+ ins KS1.s[3], w11
+ ins KS2.s[3], w12
+ ins KS3.s[3], w13
+
+ add x10, x6, #96 // round key pointer
+ ld1 {K6.4s-K7.4s}, [x10], #32
+ .irp key, K0, K1, K2, K3, K4, K5
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
-2: b.eq 3f // AES-192?
- enc_round KS0, v17
- enc_round KS1, v17
- enc_round KS0, v18
- enc_round KS1, v18
-3: enc_round KS0, v19
- enc_round KS1, v19
- enc_round KS0, v20
- enc_round KS1, v20
- b 1b
+ tbnz x7, #2, .Lnot128
+ .subsection 1
+.Lnot128:
+ ld1 {K8.4s-K9.4s}, [x10], #32
+ .irp key, K6, K7
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
+ ld1 {K6.4s-K7.4s}, [x10]
+ .irp key, K8, K9
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
+ tbz x7, #1, .Lout192
+ b .Lout256
+ .previous
-4: load_round_keys w7, x6
- b 0b
- .endm
+.Lout256:
+ .irp key, K6, K7
+ enc_qround KS0, KS1, KS2, KS3, \key
+ .endr
- /*
- * void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
- * struct ghash_key const *k, u8 ctr[],
- * int rounds, u8 ks[])
- */
-ENTRY(pmull_gcm_encrypt)
- pmull_gcm_do_crypt 1
-ENDPROC(pmull_gcm_encrypt)
+.Lout192:
+ enc_qround KS0, KS1, KS2, KS3, KK
- /*
- * void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[],
- * struct ghash_key const *k, u8 ctr[],
- * int rounds)
- */
-ENTRY(pmull_gcm_decrypt)
- pmull_gcm_do_crypt 0
-ENDPROC(pmull_gcm_decrypt)
+ aese KS0.16b, KL.16b
+ aese KS1.16b, KL.16b
+ aese KS2.16b, KL.16b
+ aese KS3.16b, KL.16b
+
+ eor KS0.16b, KS0.16b, KM.16b
+ eor KS1.16b, KS1.16b, KM.16b
+ eor KS2.16b, KS2.16b, KM.16b
+ eor KS3.16b, KS3.16b, KM.16b
+
+ eor INP0.16b, INP0.16b, KS0.16b
+ eor INP1.16b, INP1.16b, KS1.16b
+ eor INP2.16b, INP2.16b, KS2.16b
+ eor INP3.16b, INP3.16b, KS3.16b
- /*
- * void pmull_gcm_encrypt_block(u8 dst[], u8 src[], u8 rk[], int rounds)
- */
-ENTRY(pmull_gcm_encrypt_block)
- cbz x2, 0f
- load_round_keys w3, x2
-0: ld1 {v0.16b}, [x1]
- enc_block v0, w3
- st1 {v0.16b}, [x0]
ret
-ENDPROC(pmull_gcm_encrypt_block)
+ENDPROC(pmull_gcm_enc_4x)
+
+ .section ".rodata", "a"
+ .align 6
+.Lpermute_table:
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+ .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7
+ .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf
+ .previous
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 70b1469783f9..522cf004ce65 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -58,17 +58,15 @@ asmlinkage void pmull_ghash_update_p8(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head);
-asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[],
- const u8 src[], struct ghash_key const *k,
+asmlinkage void pmull_gcm_encrypt(int bytes, u8 dst[], const u8 src[],
+ struct ghash_key const *k, u64 dg[],
u8 ctr[], u32 const rk[], int rounds,
- u8 ks[]);
+ u8 tag[]);
-asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[],
- const u8 src[], struct ghash_key const *k,
- u8 ctr[], u32 const rk[], int rounds);
-
-asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
- u32 const rk[], int rounds);
+asmlinkage void pmull_gcm_decrypt(int bytes, u8 dst[], const u8 src[],
+ struct ghash_key const *k, u64 dg[],
+ u8 ctr[], u32 const rk[], int rounds,
+ u8 tag[]);
static int ghash_init(struct shash_desc *desc)
{
@@ -85,7 +83,7 @@ static void ghash_do_update(int blocks, u64 dg[], const char *src,
struct ghash_key const *k,
const char *head))
{
- if (likely(crypto_simd_usable())) {
+ if (likely(crypto_simd_usable() && simd_update)) {
kernel_neon_begin();
simd_update(blocks, dg, src, key, head);
kernel_neon_end();
@@ -398,136 +396,112 @@ static void gcm_calculate_auth_mac(struct aead_request *req, u64 dg[])
}
}
-static void gcm_final(struct aead_request *req, struct gcm_aes_ctx *ctx,
- u64 dg[], u8 tag[], int cryptlen)
-{
- u8 mac[AES_BLOCK_SIZE];
- u128 lengths;
-
- lengths.a = cpu_to_be64(req->assoclen * 8);
- lengths.b = cpu_to_be64(cryptlen * 8);
-
- ghash_do_update(1, dg, (void *)&lengths, &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
-
- put_unaligned_be64(dg[1], mac);
- put_unaligned_be64(dg[0], mac + 8);
-
- crypto_xor(tag, mac, AES_BLOCK_SIZE);
-}
-
static int gcm_encrypt(struct aead_request *req)
{
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
+ int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
+ u8 buf[AES_BLOCK_SIZE];
u8 iv[AES_BLOCK_SIZE];
- u8 ks[2 * AES_BLOCK_SIZE];
- u8 tag[AES_BLOCK_SIZE];
u64 dg[2] = {};
- int nrounds = num_rounds(&ctx->aes_key);
+ u128 lengths;
+ u8 *tag;
int err;
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64(req->cryptlen * 8);
+
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_encrypt(&walk, req, false);
- if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
- u32 const *rk = NULL;
-
- kernel_neon_begin();
- pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
- pmull_gcm_encrypt_block(ks, iv, NULL, nrounds);
- put_unaligned_be32(3, iv + GCM_IV_SIZE);
- pmull_gcm_encrypt_block(ks + AES_BLOCK_SIZE, iv, NULL, nrounds);
- put_unaligned_be32(4, iv + GCM_IV_SIZE);
-
+ if (likely(crypto_simd_usable())) {
do {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ int nbytes = walk.nbytes;
+
+ tag = (u8 *)&lengths;
- if (rk)
- kernel_neon_begin();
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+ } else if (nbytes < walk.total) {
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+ tag = NULL;
+ }
- pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr,
- walk.src.virt.addr, &ctx->ghash_key,
- iv, rk, nrounds, ks);
+ kernel_neon_begin();
+ pmull_gcm_encrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+ iv, ctx->aes_key.key_enc, nrounds,
+ tag);
kernel_neon_end();
- err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ if (unlikely(!nbytes))
+ break;
- rk = ctx->aes_key.key_enc;
- } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
- } else {
- aes_encrypt(&ctx->aes_key, tag, iv);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - nbytes, nbytes);
- while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- const int blocks =
- walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ } while (walk.nbytes);
+ } else {
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- u8 *src = walk.src.virt.addr;
int remaining = blocks;
do {
- aes_encrypt(&ctx->aes_key, ks, iv);
- crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
+ aes_encrypt(&ctx->aes_key, buf, iv);
+ crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
crypto_inc(iv, AES_BLOCK_SIZE);
dst += AES_BLOCK_SIZE;
src += AES_BLOCK_SIZE;
} while (--remaining > 0);
- ghash_do_update(blocks, dg,
- walk.dst.virt.addr, &ctx->ghash_key,
- NULL, pmull_ghash_update_p64);
+ ghash_do_update(blocks, dg, walk.dst.virt.addr,
+ &ctx->ghash_key, NULL, NULL);
err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
- }
- if (walk.nbytes) {
- aes_encrypt(&ctx->aes_key, ks, iv);
- if (walk.nbytes > AES_BLOCK_SIZE) {
- crypto_inc(iv, AES_BLOCK_SIZE);
- aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv);
- }
+ walk.nbytes % AES_BLOCK_SIZE);
}
- }
- /* handle the tail */
- if (walk.nbytes) {
- u8 buf[GHASH_BLOCK_SIZE];
- unsigned int nbytes = walk.nbytes;
- u8 *dst = walk.dst.virt.addr;
- u8 *head = NULL;
+ /* handle the tail */
+ if (walk.nbytes) {
+ aes_encrypt(&ctx->aes_key, buf, iv);
- crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, ks,
- walk.nbytes);
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+ buf, walk.nbytes);
- if (walk.nbytes > GHASH_BLOCK_SIZE) {
- head = dst;
- dst += GHASH_BLOCK_SIZE;
- nbytes %= GHASH_BLOCK_SIZE;
+ memcpy(buf, walk.dst.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
}
- memcpy(buf, dst, nbytes);
- memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
- pmull_ghash_update_p64);
+ tag = (u8 *)&lengths;
+ ghash_do_update(1, dg, tag, &ctx->ghash_key,
+ walk.nbytes ? buf : NULL, NULL);
- err = skcipher_walk_done(&walk, 0);
+ if (walk.nbytes)
+ err = skcipher_walk_done(&walk, 0);
+
+ put_unaligned_be64(dg[1], tag);
+ put_unaligned_be64(dg[0], tag + 8);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ aes_encrypt(&ctx->aes_key, iv, iv);
+ crypto_xor(tag, iv, AES_BLOCK_SIZE);
}
if (err)
return err;
- gcm_final(req, ctx, dg, tag, req->cryptlen);
-
/* copy authtag to end of dst */
scatterwalk_map_and_copy(tag, req->dst, req->assoclen + req->cryptlen,
crypto_aead_authsize(aead), 1);
@@ -540,75 +514,65 @@ static int gcm_decrypt(struct aead_request *req)
struct crypto_aead *aead = crypto_aead_reqtfm(req);
struct gcm_aes_ctx *ctx = crypto_aead_ctx(aead);
unsigned int authsize = crypto_aead_authsize(aead);
+ int nrounds = num_rounds(&ctx->aes_key);
struct skcipher_walk walk;
- u8 iv[2 * AES_BLOCK_SIZE];
- u8 tag[AES_BLOCK_SIZE];
- u8 buf[2 * GHASH_BLOCK_SIZE];
+ u8 buf[AES_BLOCK_SIZE];
+ u8 iv[AES_BLOCK_SIZE];
u64 dg[2] = {};
- int nrounds = num_rounds(&ctx->aes_key);
+ u128 lengths;
+ u8 *tag;
int err;
+ lengths.a = cpu_to_be64(req->assoclen * 8);
+ lengths.b = cpu_to_be64((req->cryptlen - authsize) * 8);
+
if (req->assoclen)
gcm_calculate_auth_mac(req, dg);
memcpy(iv, req->iv, GCM_IV_SIZE);
- put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ put_unaligned_be32(2, iv + GCM_IV_SIZE);
err = skcipher_walk_aead_decrypt(&walk, req, false);
- if (likely(crypto_simd_usable() && walk.total >= 2 * AES_BLOCK_SIZE)) {
- u32 const *rk = NULL;
-
- kernel_neon_begin();
- pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, nrounds);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
-
+ if (likely(crypto_simd_usable())) {
do {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
- int rem = walk.total - blocks * AES_BLOCK_SIZE;
-
- if (rk)
- kernel_neon_begin();
-
- pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr,
- walk.src.virt.addr, &ctx->ghash_key,
- iv, rk, nrounds);
-
- /* check if this is the final iteration of the loop */
- if (rem < (2 * AES_BLOCK_SIZE)) {
- u8 *iv2 = iv + AES_BLOCK_SIZE;
-
- if (rem > AES_BLOCK_SIZE) {
- memcpy(iv2, iv, AES_BLOCK_SIZE);
- crypto_inc(iv2, AES_BLOCK_SIZE);
- }
+ const u8 *src = walk.src.virt.addr;
+ u8 *dst = walk.dst.virt.addr;
+ int nbytes = walk.nbytes;
- pmull_gcm_encrypt_block(iv, iv, NULL, nrounds);
+ tag = (u8 *)&lengths;
- if (rem > AES_BLOCK_SIZE)
- pmull_gcm_encrypt_block(iv2, iv2, NULL,
- nrounds);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE)) {
+ src = dst = memcpy(buf + sizeof(buf) - nbytes,
+ src, nbytes);
+ } else if (nbytes < walk.total) {
+ nbytes &= ~(AES_BLOCK_SIZE - 1);
+ tag = NULL;
}
+ kernel_neon_begin();
+ pmull_gcm_decrypt(nbytes, dst, src, &ctx->ghash_key, dg,
+ iv, ctx->aes_key.key_enc, nrounds,
+ tag);
kernel_neon_end();
- err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ if (unlikely(!nbytes))
+ break;
- rk = ctx->aes_key.key_enc;
- } while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
- } else {
- aes_encrypt(&ctx->aes_key, tag, iv);
- put_unaligned_be32(2, iv + GCM_IV_SIZE);
+ if (unlikely(nbytes > 0 && nbytes < AES_BLOCK_SIZE))
+ memcpy(walk.dst.virt.addr,
+ buf + sizeof(buf) - nbytes, nbytes);
- while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
- int blocks = walk.nbytes / (2 * AES_BLOCK_SIZE) * 2;
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ } while (walk.nbytes);
+ } else {
+ while (walk.nbytes >= AES_BLOCK_SIZE) {
+ int blocks = walk.nbytes / AES_BLOCK_SIZE;
+ const u8 *src = walk.src.virt.addr;
u8 *dst = walk.dst.virt.addr;
- u8 *src = walk.src.virt.addr;
ghash_do_update(blocks, dg, walk.src.virt.addr,
- &ctx->ghash_key, NULL,
- pmull_ghash_update_p64);
+ &ctx->ghash_key, NULL, NULL);
do {
aes_encrypt(&ctx->aes_key, buf, iv);
@@ -620,49 +584,38 @@ static int gcm_decrypt(struct aead_request *req)
} while (--blocks > 0);
err = skcipher_walk_done(&walk,
- walk.nbytes % (2 * AES_BLOCK_SIZE));
+ walk.nbytes % AES_BLOCK_SIZE);
}
- if (walk.nbytes) {
- if (walk.nbytes > AES_BLOCK_SIZE) {
- u8 *iv2 = iv + AES_BLOCK_SIZE;
-
- memcpy(iv2, iv, AES_BLOCK_SIZE);
- crypto_inc(iv2, AES_BLOCK_SIZE);
- aes_encrypt(&ctx->aes_key, iv2, iv2);
- }
- aes_encrypt(&ctx->aes_key, iv, iv);
+ /* handle the tail */
+ if (walk.nbytes) {
+ memcpy(buf, walk.src.virt.addr, walk.nbytes);
+ memset(buf + walk.nbytes, 0, sizeof(buf) - walk.nbytes);
}
- }
- /* handle the tail */
- if (walk.nbytes) {
- const u8 *src = walk.src.virt.addr;
- const u8 *head = NULL;
- unsigned int nbytes = walk.nbytes;
+ tag = (u8 *)&lengths;
+ ghash_do_update(1, dg, tag, &ctx->ghash_key,
+ walk.nbytes ? buf : NULL, NULL);
- if (walk.nbytes > GHASH_BLOCK_SIZE) {
- head = src;
- src += GHASH_BLOCK_SIZE;
- nbytes %= GHASH_BLOCK_SIZE;
- }
+ if (walk.nbytes) {
+ aes_encrypt(&ctx->aes_key, buf, iv);
- memcpy(buf, src, nbytes);
- memset(buf + nbytes, 0, GHASH_BLOCK_SIZE - nbytes);
- ghash_do_update(!!nbytes, dg, buf, &ctx->ghash_key, head,
- pmull_ghash_update_p64);
+ crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr,
+ buf, walk.nbytes);
- crypto_xor_cpy(walk.dst.virt.addr, walk.src.virt.addr, iv,
- walk.nbytes);
+ err = skcipher_walk_done(&walk, 0);
+ }
- err = skcipher_walk_done(&walk, 0);
+ put_unaligned_be64(dg[1], tag);
+ put_unaligned_be64(dg[0], tag + 8);
+ put_unaligned_be32(1, iv + GCM_IV_SIZE);
+ aes_encrypt(&ctx->aes_key, iv, iv);
+ crypto_xor(tag, iv, AES_BLOCK_SIZE);
}
if (err)
return err;
- gcm_final(req, ctx, dg, tag, req->cryptlen - authsize);
-
/* compare calculated auth tag with the stored one */
scatterwalk_map_and_copy(buf, req->src,
req->assoclen + req->cryptlen - authsize,
@@ -675,7 +628,7 @@ static int gcm_decrypt(struct aead_request *req)
static struct aead_alg gcm_aes_alg = {
.ivsize = GCM_IV_SIZE,
- .chunksize = 2 * AES_BLOCK_SIZE,
+ .chunksize = AES_BLOCK_SIZE,
.maxauthsize = AES_BLOCK_SIZE,
.setkey = gcm_setkey,
.setauthsize = gcm_setauthsize,
diff --git a/arch/arm64/crypto/poly1305-armv8.pl b/arch/arm64/crypto/poly1305-armv8.pl
new file mode 100644
index 000000000000..6e5576d19af8
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-armv8.pl
@@ -0,0 +1,913 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, initially for the OpenSSL
+# project.
+# ====================================================================
+#
+# This module implements Poly1305 hash for ARMv8.
+#
+# June 2015
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc-4.9 NEON
+#
+# Apple A7 1.86/+5% 0.72
+# Cortex-A53 2.69/+58% 1.47
+# Cortex-A57 2.70/+7% 1.14
+# Denver 1.64/+50% 1.18(*)
+# X-Gene 2.13/+68% 2.27
+# Mongoose 1.77/+75% 1.12
+# Kryo 2.70/+55% 1.13
+# ThunderX2 1.17/+95% 1.36
+#
+# (*) estimate based on resources availability is less than 1.0,
+# i.e. measured result is worse than expected, presumably binary
+# translator is not almighty;
+
+$flavour=shift;
+$output=shift;
+
+if ($flavour && $flavour ne "void") {
+ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+ ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
+ ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
+ die "can't locate arm-xlate.pl";
+
+ open STDOUT,"| \"$^X\" $xlate $flavour $output";
+} else {
+ open STDOUT,">$output";
+}
+
+my ($ctx,$inp,$len,$padbit) = map("x$_",(0..3));
+my ($mac,$nonce)=($inp,$len);
+
+my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) = map("x$_",(4..14));
+
+$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp $inp,xzr
+ stp xzr,xzr,[$ctx] // zero hash value
+ stp xzr,xzr,[$ctx,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp $r0,$r1,[$inp] // load key
+ mov $s1,#0xfffffffc0fffffff
+ movk $s1,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev $r0,$r0 // flip bytes
+ rev $r1,$r1
+#endif
+ and $r0,$r0,$s1 // &=0ffffffc0fffffff
+ and $s1,$s1,#-4
+ and $r1,$r1,$s1 // &=0ffffffc0ffffffc
+ mov w#$s1,#-1
+ stp $r0,$r1,[$ctx,#32] // save key value
+ str w#$s1,[$ctx,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr $d0,.Lpoly1305_blocks
+ adr $r0,.Lpoly1305_blocks_neon
+ adr $d1,.Lpoly1305_emit
+
+ csel $d0,$d0,$r0,eq
+
+# ifdef __ILP32__
+ stp w#$d0,w#$d1,[$len]
+# else
+ stp $d0,$d1,[$len]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands $len,$len,#-16
+ b.eq .Lno_data
+
+ ldp $h0,$h1,[$ctx] // load hash value
+ ldp $h2,x17,[$ctx,#16] // [along with is_base2_26]
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+.Loop:
+ ldp $t0,$t1,[$inp],#16 // load input
+ sub $len,$len,#16
+#ifdef __AARCH64EB__
+ rev $t0,$t0
+ rev $t1,$t1
+#endif
+ adds $h0,$h0,$t0 // accumulate input
+ adcs $h1,$h1,$t1
+
+ mul $d0,$h0,$r0 // h0*r0
+ adc $h2,$h2,$padbit
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ cbnz $len,.Loop
+
+ stp $h0,$h1,[$ctx] // store hash value
+ stp $h2,xzr,[$ctx,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp $h0,$h1,[$ctx] // load hash base 2^64
+ ldp $h2,$r0,[$ctx,#16] // [along with is_base2_26]
+ ldp $t0,$t1,[$nonce] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr $d0,$h0,#32
+ mov w#$d1,w#$h0
+ lsr $d2,$h1,#32
+ mov w15,w#$h1
+ lsr x16,$h2,#32
+#else
+ mov w#$d0,w#$h0
+ lsr $d1,$h0,#32
+ mov w#$d2,w#$h1
+ lsr x15,$h1,#32
+ mov w16,w#$h2
+#endif
+
+ add $d0,$d0,$d1,lsl#26 // base 2^26 -> base 2^64
+ lsr $d1,$d2,#12
+ adds $d0,$d0,$d2,lsl#52
+ add $d1,$d1,x15,lsl#14
+ adc $d1,$d1,xzr
+ lsr $d2,x16,#24
+ adds $d1,$d1,x16,lsl#40
+ adc $d2,$d2,xzr
+
+ cmp $r0,#0 // is_base2_26?
+ csel $h0,$h0,$d0,eq // choose between radixes
+ csel $h1,$h1,$d1,eq
+ csel $h2,$h2,$d2,eq
+
+ adds $d0,$h0,#5 // compare to modulus
+ adcs $d1,$h1,xzr
+ adc $d2,$h2,xzr
+
+ tst $d2,#-4 // see if it's carried/borrowed
+
+ csel $h0,$h0,$d0,eq
+ csel $h1,$h1,$d1,eq
+
+#ifdef __AARCH64EB__
+ ror $t0,$t0,#32 // flip nonce words
+ ror $t1,$t1,#32
+#endif
+ adds $h0,$h0,$t0 // accumulate nonce
+ adc $h1,$h1,$t1
+#ifdef __AARCH64EB__
+ rev $h0,$h0 // flip output bytes
+ rev $h1,$h1
+#endif
+ stp $h0,$h1,[$mac] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+___
+my ($R0,$R1,$S1,$R2,$S2,$R3,$S3,$R4,$S4) = map("v$_.4s",(0..8));
+my ($IN01_0,$IN01_1,$IN01_2,$IN01_3,$IN01_4) = map("v$_.2s",(9..13));
+my ($IN23_0,$IN23_1,$IN23_2,$IN23_3,$IN23_4) = map("v$_.2s",(14..18));
+my ($ACC0,$ACC1,$ACC2,$ACC3,$ACC4) = map("v$_.2d",(19..23));
+my ($H0,$H1,$H2,$H3,$H4) = map("v$_.2s",(24..28));
+my ($T0,$T1,$MASK) = map("v$_",(29..31));
+
+my ($in2,$zeros)=("x16","x17");
+my $is_base2_26 = $zeros; # borrow
+
+$code.=<<___;
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul $d0,$h0,$r0 // h0*r0
+ umulh $d1,$h0,$r0
+
+ mul $t0,$h1,$s1 // h1*5*r1
+ umulh $t1,$h1,$s1
+
+ adds $d0,$d0,$t0
+ mul $t0,$h0,$r1 // h0*r1
+ adc $d1,$d1,$t1
+ umulh $d2,$h0,$r1
+
+ adds $d1,$d1,$t0
+ mul $t0,$h1,$r0 // h1*r0
+ adc $d2,$d2,xzr
+ umulh $t1,$h1,$r0
+
+ adds $d1,$d1,$t0
+ mul $t0,$h2,$s1 // h2*5*r1
+ adc $d2,$d2,$t1
+ mul $t1,$h2,$r0 // h2*r0
+
+ adds $d1,$d1,$t0
+ adc $d2,$d2,$t1
+
+ and $t0,$d2,#-4 // final reduction
+ and $h2,$d2,#3
+ add $t0,$t0,$d2,lsr#2
+ adds $h0,$d0,$t0
+ adcs $h1,$d1,xzr
+ adc $h2,$h2,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,$h0,#26,#26
+ extr x14,$h1,$h0,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,$h1,#14,#26
+ extr x16,$h2,$h1,#40
+
+ str w12,[$ctx,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[$ctx,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[$ctx,#16*2] // s1
+ str w14,[$ctx,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[$ctx,#16*4] // s2
+ str w15,[$ctx,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[$ctx,#16*6] // s3
+ str w16,[$ctx,#16*7] // r4
+ str w15,[$ctx,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr $is_base2_26,[$ctx,#24]
+ cmp $len,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz $is_base2_26,.Lbase2_64_neon
+
+ ldp w10,w11,[$ctx] // load hash value base 2^26
+ ldp w12,w13,[$ctx,#8]
+ ldr w14,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Leven_neon
+
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ add $h0,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr $h1,x12,#12
+ adds $h0,$h0,x12,lsl#52
+ add $h1,$h1,x13,lsl#14
+ adc $h1,$h1,xzr
+ lsr $h2,x14,#24
+ adds $h1,$h1,x14,lsl#40
+ adc $d2,$h2,xzr // can be partially reduced...
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp $r0,$r1,[$ctx,#32] // load key value
+
+ ldp $h0,$h1,[$ctx] // load hash value base 2^64
+ ldr $h2,[$ctx,#16]
+
+ tst $len,#31
+ b.eq .Linit_neon
+
+ ldp $d0,$d1,[$inp],#16 // load input
+ sub $len,$len,#16
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev $d0,$d0
+ rev $d1,$d1
+#endif
+ adds $h0,$h0,$d0 // accumulate input
+ adcs $h1,$h1,$d1
+ adc $h2,$h2,$padbit
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[$ctx,#48] // first table element
+ and x10,$h0,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,$h0,#26,#26
+ extr x12,$h1,$h0,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,$h1,#14,#26
+ extr x14,$h2,$h1,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+ ////////////////////////////////// initialize r^n table
+ mov $h0,$r0 // r^1
+ add $s1,$r1,$r1,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov $h1,$r1
+ mov $h2,xzr
+ add $ctx,$ctx,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub $ctx,$ctx,#4
+ bl poly1305_splat
+ sub $ctx,$ctx,#48 // restore original $ctx
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov ${H0},x10
+ fmov ${H1},x11
+ fmov ${H2},x12
+ fmov ${H3},x13
+ fmov ${H4},x14
+
+.Ldo_neon:
+ ldp x8,x12,[$inp,#32] // inp[2:3]
+ subs $len,$len,#64
+ ldp x9,x13,[$inp,#48]
+ add $in2,$inp,#96
+ adr $zeros,.Lzeros
+
+ lsl $padbit,$padbit,#24
+ add x15,$ctx,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN23_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN23_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov $IN23_2,x8
+ fmov $IN23_3,x10
+ fmov $IN23_4,x12
+
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ ldp x9,x13,[$inp],#48
+
+ ld1 {$R0,$R1,$S1,$R2},[x15],#64
+ ld1 {$S2,$R3,$S3,$R4},[x15],#64
+ ld1 {$S4},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov $IN01_0,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,$padbit,x12,lsr#40
+ add x13,$padbit,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov $IN01_1,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi $MASK.2d,#-1
+ fmov $IN01_2,x8
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+ ushr $MASK.2d,$MASK.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // \___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // \___________________/ \____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs $len,$len,#64
+ umull $ACC4,$IN23_0,${R4}[2]
+ csel $in2,$zeros,$in2,lo
+ umull $ACC3,$IN23_0,${R3}[2]
+ umull $ACC2,$IN23_0,${R2}[2]
+ ldp x8,x12,[$in2],#16 // inp[2:3] (or zero)
+ umull $ACC1,$IN23_0,${R1}[2]
+ ldp x9,x13,[$in2],#48
+ umull $ACC0,$IN23_0,${R0}[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal $ACC4,$IN23_1,${R3}[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC3,$IN23_1,${R2}[2]
+ and x5,x9,#0x03ffffff
+ umlal $ACC2,$IN23_1,${R1}[2]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN23_1,${R0}[2]
+ ubfx x7,x9,#26,#26
+ umlal $ACC0,$IN23_1,${S4}[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal $ACC4,$IN23_2,${R2}[2]
+ extr x8,x12,x8,#52
+ umlal $ACC3,$IN23_2,${R1}[2]
+ extr x9,x13,x9,#52
+ umlal $ACC2,$IN23_2,${R0}[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC1,$IN23_2,${S4}[2]
+ fmov $IN23_0,x4
+ umlal $ACC0,$IN23_2,${S3}[2]
+ and x8,x8,#0x03ffffff
+
+ umlal $ACC4,$IN23_3,${R1}[2]
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN23_3,${R0}[2]
+ ubfx x10,x12,#14,#26
+ umlal $ACC2,$IN23_3,${S4}[2]
+ ubfx x11,x13,#14,#26
+ umlal $ACC1,$IN23_3,${S3}[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC0,$IN23_3,${S2}[2]
+ fmov $IN23_1,x6
+
+ add $IN01_2,$IN01_2,$H2
+ add x12,$padbit,x12,lsr#40
+ umlal $ACC4,$IN23_4,${R0}[2]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC3,$IN23_4,${S4}[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC2,$IN23_4,${S3}[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN23_4,${S2}[2]
+ fmov $IN23_2,x8
+ umlal $ACC0,$IN23_4,${S1}[2]
+ fmov $IN23_3,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ fmov $IN23_4,x12
+ umlal $ACC3,$IN01_2,${R1}[0]
+ ldp x8,x12,[$inp],#16 // inp[0:1]
+ umlal $ACC0,$IN01_2,${S3}[0]
+ ldp x9,x13,[$inp],#48
+ umlal $ACC4,$IN01_2,${R2}[0]
+ umlal $ACC1,$IN01_2,${S4}[0]
+ umlal $ACC2,$IN01_2,${R0}[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}[0]
+ umlal $ACC4,$IN01_0,${R4}[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal $ACC2,$IN01_0,${R2}[0]
+ and x5,x9,#0x03ffffff
+ umlal $ACC0,$IN01_0,${R0}[0]
+ ubfx x6,x8,#26,#26
+ umlal $ACC1,$IN01_0,${R1}[0]
+ ubfx x7,x9,#26,#26
+
+ add $IN01_3,$IN01_3,$H3
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal $ACC3,$IN01_1,${R2}[0]
+ extr x8,x12,x8,#52
+ umlal $ACC4,$IN01_1,${R3}[0]
+ extr x9,x13,x9,#52
+ umlal $ACC0,$IN01_1,${S4}[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal $ACC2,$IN01_1,${R1}[0]
+ fmov $IN01_0,x4
+ umlal $ACC1,$IN01_1,${R0}[0]
+ and x8,x8,#0x03ffffff
+
+ add $IN01_4,$IN01_4,$H4
+ and x9,x9,#0x03ffffff
+ umlal $ACC3,$IN01_3,${R0}[0]
+ ubfx x10,x12,#14,#26
+ umlal $ACC0,$IN01_3,${S2}[0]
+ ubfx x11,x13,#14,#26
+ umlal $ACC4,$IN01_3,${R1}[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal $ACC1,$IN01_3,${S3}[0]
+ fmov $IN01_1,x6
+ umlal $ACC2,$IN01_3,${S4}[0]
+ add x12,$padbit,x12,lsr#40
+
+ umlal $ACC3,$IN01_4,${S4}[0]
+ add x13,$padbit,x13,lsr#40
+ umlal $ACC0,$IN01_4,${S1}[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal $ACC4,$IN01_4,${R0}[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal $ACC1,$IN01_4,${S2}[0]
+ fmov $IN01_2,x8
+ umlal $ACC2,$IN01_4,${S3}[0]
+ fmov $IN01_3,x10
+ fmov $IN01_4,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr $T0.2d,$ACC3,#26
+ xtn $H3,$ACC3
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ bic $H3,#0xfc,lsl#24 // &=0x03ffffff
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ xtn $H4,$ACC4
+ ushr $T1.2d,$ACC1,#26
+ xtn $H1,$ACC1
+ bic $H4,#0xfc,lsl#24
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ shrn $T1.2s,$ACC2,#26
+ xtn $H2,$ACC2
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ bic $H1,#0xfc,lsl#24
+ add $H3,$H3,$T1.2s // h2 -> h3
+ bic $H2,#0xfc,lsl#24
+
+ shrn $T0.2s,$ACC0,#26
+ xtn $H0,$ACC0
+ ushr $T1.2s,$H3,#26
+ bic $H3,#0xfc,lsl#24
+ bic $H0,#0xfc,lsl#24
+ add $H1,$H1,$T0.2s // h0 -> h1
+ add $H4,$H4,$T1.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup $IN23_2,${IN23_2}[0]
+ add $IN01_2,$IN01_2,$H2
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds $len,$len,#32
+ b.ne .Long_tail
+
+ dup $IN23_2,${IN01_2}[0]
+ add $IN23_0,$IN01_0,$H0
+ add $IN23_3,$IN01_3,$H3
+ add $IN23_1,$IN01_1,$H1
+ add $IN23_4,$IN01_4,$H4
+
+.Long_tail:
+ dup $IN23_0,${IN23_0}[0]
+ umull2 $ACC0,$IN23_2,${S3}
+ umull2 $ACC3,$IN23_2,${R1}
+ umull2 $ACC4,$IN23_2,${R2}
+ umull2 $ACC2,$IN23_2,${R0}
+ umull2 $ACC1,$IN23_2,${S4}
+
+ dup $IN23_1,${IN23_1}[0]
+ umlal2 $ACC0,$IN23_0,${R0}
+ umlal2 $ACC2,$IN23_0,${R2}
+ umlal2 $ACC3,$IN23_0,${R3}
+ umlal2 $ACC4,$IN23_0,${R4}
+ umlal2 $ACC1,$IN23_0,${R1}
+
+ dup $IN23_3,${IN23_3}[0]
+ umlal2 $ACC0,$IN23_1,${S4}
+ umlal2 $ACC3,$IN23_1,${R2}
+ umlal2 $ACC2,$IN23_1,${R1}
+ umlal2 $ACC4,$IN23_1,${R3}
+ umlal2 $ACC1,$IN23_1,${R0}
+
+ dup $IN23_4,${IN23_4}[0]
+ umlal2 $ACC3,$IN23_3,${R0}
+ umlal2 $ACC4,$IN23_3,${R1}
+ umlal2 $ACC0,$IN23_3,${S2}
+ umlal2 $ACC1,$IN23_3,${S3}
+ umlal2 $ACC2,$IN23_3,${S4}
+
+ umlal2 $ACC3,$IN23_4,${S4}
+ umlal2 $ACC0,$IN23_4,${S1}
+ umlal2 $ACC4,$IN23_4,${R0}
+ umlal2 $ACC1,$IN23_4,${S2}
+ umlal2 $ACC2,$IN23_4,${S3}
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add $IN01_0,$IN01_0,$H0
+ umlal $ACC3,$IN01_2,${R1}
+ umlal $ACC0,$IN01_2,${S3}
+ umlal $ACC4,$IN01_2,${R2}
+ umlal $ACC1,$IN01_2,${S4}
+ umlal $ACC2,$IN01_2,${R0}
+
+ add $IN01_1,$IN01_1,$H1
+ umlal $ACC3,$IN01_0,${R3}
+ umlal $ACC0,$IN01_0,${R0}
+ umlal $ACC4,$IN01_0,${R4}
+ umlal $ACC1,$IN01_0,${R1}
+ umlal $ACC2,$IN01_0,${R2}
+
+ add $IN01_3,$IN01_3,$H3
+ umlal $ACC3,$IN01_1,${R2}
+ umlal $ACC0,$IN01_1,${S4}
+ umlal $ACC4,$IN01_1,${R3}
+ umlal $ACC1,$IN01_1,${R0}
+ umlal $ACC2,$IN01_1,${R1}
+
+ add $IN01_4,$IN01_4,$H4
+ umlal $ACC3,$IN01_3,${R0}
+ umlal $ACC0,$IN01_3,${S2}
+ umlal $ACC4,$IN01_3,${R1}
+ umlal $ACC1,$IN01_3,${S3}
+ umlal $ACC2,$IN01_3,${S4}
+
+ umlal $ACC3,$IN01_4,${S4}
+ umlal $ACC0,$IN01_4,${S1}
+ umlal $ACC4,$IN01_4,${R0}
+ umlal $ACC1,$IN01_4,${S2}
+ umlal $ACC2,$IN01_4,${S3}
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp $ACC3,$ACC3,$ACC3
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp $ACC0,$ACC0,$ACC0
+ ldp d10,d11,[sp,#32]
+ addp $ACC4,$ACC4,$ACC4
+ ldp d12,d13,[sp,#48]
+ addp $ACC1,$ACC1,$ACC1
+ ldp d14,d15,[sp,#64]
+ addp $ACC2,$ACC2,$ACC2
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr $T0.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ ushr $T1.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+
+ add $ACC4,$ACC4,$T0.2d // h3 -> h4
+ add $ACC1,$ACC1,$T1.2d // h0 -> h1
+
+ ushr $T0.2d,$ACC4,#26
+ and $ACC4,$ACC4,$MASK.2d
+ ushr $T1.2d,$ACC1,#26
+ and $ACC1,$ACC1,$MASK.2d
+ add $ACC2,$ACC2,$T1.2d // h1 -> h2
+
+ add $ACC0,$ACC0,$T0.2d
+ shl $T0.2d,$T0.2d,#2
+ ushr $T1.2d,$ACC2,#26
+ and $ACC2,$ACC2,$MASK.2d
+ add $ACC0,$ACC0,$T0.2d // h4 -> h0
+ add $ACC3,$ACC3,$T1.2d // h2 -> h3
+
+ ushr $T0.2d,$ACC0,#26
+ and $ACC0,$ACC0,$MASK.2d
+ ushr $T1.2d,$ACC3,#26
+ and $ACC3,$ACC3,$MASK.2d
+ add $ACC1,$ACC1,$T0.2d // h0 -> h1
+ add $ACC4,$ACC4,$T1.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {$ACC0,$ACC1,$ACC2,$ACC3}[0],[$ctx],#16
+ mov x4,#1
+ st1 {$ACC4}[0],[$ctx]
+ str x4,[$ctx,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by \@dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
+___
+
+foreach (split("\n",$code)) {
+ s/\b(shrn\s+v[0-9]+)\.[24]d/$1.2s/ or
+ s/\b(fmov\s+)v([0-9]+)[^,]*,\s*x([0-9]+)/$1d$2,x$3/ or
+ (m/\bdup\b/ and (s/\.[24]s/.2d/g or 1)) or
+ (m/\b(eor|and)/ and (s/\.[248][sdh]/.16b/g or 1)) or
+ (m/\bum(ul|la)l\b/ and (s/\.4s/.2s/g or 1)) or
+ (m/\bum(ul|la)l2\b/ and (s/\.2s/.4s/g or 1)) or
+ (m/\bst[1-4]\s+{[^}]+}\[/ and (s/\.[24]d/.s/g or 1));
+
+ s/\.[124]([sd])\[/.$1\[/;
+ s/w#x([0-9]+)/w$1/g;
+
+ print $_,"\n";
+}
+close STDOUT;
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
new file mode 100644
index 000000000000..8d1c4e420ccd
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-core.S_shipped
@@ -0,0 +1,835 @@
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armcap_P
+#endif
+
+.text
+
+// forward "declarations" are required for Apple
+.globl poly1305_blocks
+.globl poly1305_emit
+
+.globl poly1305_init
+.type poly1305_init,%function
+.align 5
+poly1305_init:
+ cmp x1,xzr
+ stp xzr,xzr,[x0] // zero hash value
+ stp xzr,xzr,[x0,#16] // [along with is_base2_26]
+
+ csel x0,xzr,x0,eq
+ b.eq .Lno_key
+
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armcap_P
+ ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
+#endif
+
+ ldp x7,x8,[x1] // load key
+ mov x9,#0xfffffffc0fffffff
+ movk x9,#0x0fff,lsl#48
+#ifdef __AARCH64EB__
+ rev x7,x7 // flip bytes
+ rev x8,x8
+#endif
+ and x7,x7,x9 // &=0ffffffc0fffffff
+ and x9,x9,#-4
+ and x8,x8,x9 // &=0ffffffc0ffffffc
+ mov w9,#-1
+ stp x7,x8,[x0,#32] // save key value
+ str w9,[x0,#48] // impossible key power value
+
+#ifndef __KERNEL__
+ tst w17,#ARMV7_NEON
+
+ adr x12,.Lpoly1305_blocks
+ adr x7,.Lpoly1305_blocks_neon
+ adr x13,.Lpoly1305_emit
+
+ csel x12,x12,x7,eq
+
+# ifdef __ILP32__
+ stp w12,w13,[x2]
+# else
+ stp x12,x13,[x2]
+# endif
+#endif
+ mov x0,#1
+.Lno_key:
+ ret
+.size poly1305_init,.-poly1305_init
+
+.type poly1305_blocks,%function
+.align 5
+poly1305_blocks:
+.Lpoly1305_blocks:
+ ands x2,x2,#-16
+ b.eq .Lno_data
+
+ ldp x4,x5,[x0] // load hash value
+ ldp x6,x17,[x0,#16] // [along with is_base2_26]
+ ldp x7,x8,[x0,#32] // load key value
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x17,#0 // is_base2_26?
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+.Loop:
+ ldp x10,x11,[x1],#16 // load input
+ sub x2,x2,#16
+#ifdef __AARCH64EB__
+ rev x10,x10
+ rev x11,x11
+#endif
+ adds x4,x4,x10 // accumulate input
+ adcs x5,x5,x11
+
+ mul x12,x4,x7 // h0*r0
+ adc x6,x6,x3
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ cbnz x2,.Loop
+
+ stp x4,x5,[x0] // store hash value
+ stp x6,xzr,[x0,#16] // [and clear is_base2_26]
+
+.Lno_data:
+ ret
+.size poly1305_blocks,.-poly1305_blocks
+
+.type poly1305_emit,%function
+.align 5
+poly1305_emit:
+.Lpoly1305_emit:
+ ldp x4,x5,[x0] // load hash base 2^64
+ ldp x6,x7,[x0,#16] // [along with is_base2_26]
+ ldp x10,x11,[x2] // load nonce
+
+#ifdef __AARCH64EB__
+ lsr x12,x4,#32
+ mov w13,w4
+ lsr x14,x5,#32
+ mov w15,w5
+ lsr x16,x6,#32
+#else
+ mov w12,w4
+ lsr x13,x4,#32
+ mov w14,w5
+ lsr x15,x5,#32
+ mov w16,w6
+#endif
+
+ add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
+ lsr x13,x14,#12
+ adds x12,x12,x14,lsl#52
+ add x13,x13,x15,lsl#14
+ adc x13,x13,xzr
+ lsr x14,x16,#24
+ adds x13,x13,x16,lsl#40
+ adc x14,x14,xzr
+
+ cmp x7,#0 // is_base2_26?
+ csel x4,x4,x12,eq // choose between radixes
+ csel x5,x5,x13,eq
+ csel x6,x6,x14,eq
+
+ adds x12,x4,#5 // compare to modulus
+ adcs x13,x5,xzr
+ adc x14,x6,xzr
+
+ tst x14,#-4 // see if it's carried/borrowed
+
+ csel x4,x4,x12,eq
+ csel x5,x5,x13,eq
+
+#ifdef __AARCH64EB__
+ ror x10,x10,#32 // flip nonce words
+ ror x11,x11,#32
+#endif
+ adds x4,x4,x10 // accumulate nonce
+ adc x5,x5,x11
+#ifdef __AARCH64EB__
+ rev x4,x4 // flip output bytes
+ rev x5,x5
+#endif
+ stp x4,x5,[x1] // write result
+
+ ret
+.size poly1305_emit,.-poly1305_emit
+.type poly1305_mult,%function
+.align 5
+poly1305_mult:
+ mul x12,x4,x7 // h0*r0
+ umulh x13,x4,x7
+
+ mul x10,x5,x9 // h1*5*r1
+ umulh x11,x5,x9
+
+ adds x12,x12,x10
+ mul x10,x4,x8 // h0*r1
+ adc x13,x13,x11
+ umulh x14,x4,x8
+
+ adds x13,x13,x10
+ mul x10,x5,x7 // h1*r0
+ adc x14,x14,xzr
+ umulh x11,x5,x7
+
+ adds x13,x13,x10
+ mul x10,x6,x9 // h2*5*r1
+ adc x14,x14,x11
+ mul x11,x6,x7 // h2*r0
+
+ adds x13,x13,x10
+ adc x14,x14,x11
+
+ and x10,x14,#-4 // final reduction
+ and x6,x14,#3
+ add x10,x10,x14,lsr#2
+ adds x4,x12,x10
+ adcs x5,x13,xzr
+ adc x6,x6,xzr
+
+ ret
+.size poly1305_mult,.-poly1305_mult
+
+.type poly1305_splat,%function
+.align 4
+poly1305_splat:
+ and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x13,x4,#26,#26
+ extr x14,x5,x4,#52
+ and x14,x14,#0x03ffffff
+ ubfx x15,x5,#14,#26
+ extr x16,x6,x5,#40
+
+ str w12,[x0,#16*0] // r0
+ add w12,w13,w13,lsl#2 // r1*5
+ str w13,[x0,#16*1] // r1
+ add w13,w14,w14,lsl#2 // r2*5
+ str w12,[x0,#16*2] // s1
+ str w14,[x0,#16*3] // r2
+ add w14,w15,w15,lsl#2 // r3*5
+ str w13,[x0,#16*4] // s2
+ str w15,[x0,#16*5] // r3
+ add w15,w16,w16,lsl#2 // r4*5
+ str w14,[x0,#16*6] // s3
+ str w16,[x0,#16*7] // r4
+ str w15,[x0,#16*8] // s4
+
+ ret
+.size poly1305_splat,.-poly1305_splat
+
+#ifdef __KERNEL__
+.globl poly1305_blocks_neon
+#endif
+.type poly1305_blocks_neon,%function
+.align 5
+poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
+ ldr x17,[x0,#24]
+ cmp x2,#128
+ b.lo .Lpoly1305_blocks
+
+ .inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-80]!
+ add x29,sp,#0
+
+ stp d8,d9,[sp,#16] // meet ABI requirements
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+
+ cbz x17,.Lbase2_64_neon
+
+ ldp w10,w11,[x0] // load hash value base 2^26
+ ldp w12,w13,[x0,#8]
+ ldr w14,[x0,#16]
+
+ tst x2,#31
+ b.eq .Leven_neon
+
+ ldp x7,x8,[x0,#32] // load key value
+
+ add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
+ lsr x5,x12,#12
+ adds x4,x4,x12,lsl#52
+ add x5,x5,x13,lsl#14
+ adc x5,x5,xzr
+ lsr x6,x14,#24
+ adds x5,x5,x14,lsl#40
+ adc x14,x6,xzr // can be partially reduced...
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ b .Leven_neon
+
+.align 4
+.Lbase2_64_neon:
+ ldp x7,x8,[x0,#32] // load key value
+
+ ldp x4,x5,[x0] // load hash value base 2^64
+ ldr x6,[x0,#16]
+
+ tst x2,#31
+ b.eq .Linit_neon
+
+ ldp x12,x13,[x1],#16 // load input
+ sub x2,x2,#16
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+#ifdef __AARCH64EB__
+ rev x12,x12
+ rev x13,x13
+#endif
+ adds x4,x4,x12 // accumulate input
+ adcs x5,x5,x13
+ adc x6,x6,x3
+
+ bl poly1305_mult
+
+.Linit_neon:
+ ldr w17,[x0,#48] // first table element
+ and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
+ ubfx x11,x4,#26,#26
+ extr x12,x5,x4,#52
+ and x12,x12,#0x03ffffff
+ ubfx x13,x5,#14,#26
+ extr x14,x6,x5,#40
+
+ cmp w17,#-1 // is value impossible?
+ b.ne .Leven_neon
+
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+ ////////////////////////////////// initialize r^n table
+ mov x4,x7 // r^1
+ add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
+ mov x5,x8
+ mov x6,xzr
+ add x0,x0,#48+12
+ bl poly1305_splat
+
+ bl poly1305_mult // r^2
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^3
+ sub x0,x0,#4
+ bl poly1305_splat
+
+ bl poly1305_mult // r^4
+ sub x0,x0,#4
+ bl poly1305_splat
+ sub x0,x0,#48 // restore original x0
+ b .Ldo_neon
+
+.align 4
+.Leven_neon:
+ fmov d24,x10
+ fmov d25,x11
+ fmov d26,x12
+ fmov d27,x13
+ fmov d28,x14
+
+.Ldo_neon:
+ ldp x8,x12,[x1,#32] // inp[2:3]
+ subs x2,x2,#64
+ ldp x9,x13,[x1,#48]
+ add x16,x1,#96
+ adr x17,.Lzeros
+
+ lsl x3,x3,#24
+ add x15,x0,#48
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d14,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d15,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ fmov d16,x8
+ fmov d17,x10
+ fmov d18,x12
+
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ ldp x9,x13,[x1],#48
+
+ ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
+ ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
+ ld1 {v8.4s},[x15]
+
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ and x5,x9,#0x03ffffff
+ ubfx x6,x8,#26,#26
+ ubfx x7,x9,#26,#26
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ extr x8,x12,x8,#52
+ extr x9,x13,x9,#52
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ fmov d9,x4
+ and x8,x8,#0x03ffffff
+ and x9,x9,#0x03ffffff
+ ubfx x10,x12,#14,#26
+ ubfx x11,x13,#14,#26
+ add x12,x3,x12,lsr#40
+ add x13,x3,x13,lsr#40
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ fmov d10,x6
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ movi v31.2d,#-1
+ fmov d11,x8
+ fmov d12,x10
+ fmov d13,x12
+ ushr v31.2d,v31.2d,#38
+
+ b.ls .Lskip_loop
+
+.align 4
+.Loop_neon:
+ ////////////////////////////////////////////////////////////////
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
+ // ___________________/
+ // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
+ // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
+ // ___________________/ ____________________/
+ //
+ // Note that we start with inp[2:3]*r^2. This is because it
+ // doesn't depend on reduction in previous iteration.
+ ////////////////////////////////////////////////////////////////
+ // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
+ // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
+ // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
+ // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
+ // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
+
+ subs x2,x2,#64
+ umull v23.2d,v14.2s,v7.s[2]
+ csel x16,x17,x16,lo
+ umull v22.2d,v14.2s,v5.s[2]
+ umull v21.2d,v14.2s,v3.s[2]
+ ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
+ umull v20.2d,v14.2s,v1.s[2]
+ ldp x9,x13,[x16],#48
+ umull v19.2d,v14.2s,v0.s[2]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ umlal v23.2d,v15.2s,v5.s[2]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v22.2d,v15.2s,v3.s[2]
+ and x5,x9,#0x03ffffff
+ umlal v21.2d,v15.2s,v1.s[2]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v15.2s,v0.s[2]
+ ubfx x7,x9,#26,#26
+ umlal v19.2d,v15.2s,v8.s[2]
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+
+ umlal v23.2d,v16.2s,v3.s[2]
+ extr x8,x12,x8,#52
+ umlal v22.2d,v16.2s,v1.s[2]
+ extr x9,x13,x9,#52
+ umlal v21.2d,v16.2s,v0.s[2]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v20.2d,v16.2s,v8.s[2]
+ fmov d14,x4
+ umlal v19.2d,v16.2s,v6.s[2]
+ and x8,x8,#0x03ffffff
+
+ umlal v23.2d,v17.2s,v1.s[2]
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v17.2s,v0.s[2]
+ ubfx x10,x12,#14,#26
+ umlal v21.2d,v17.2s,v8.s[2]
+ ubfx x11,x13,#14,#26
+ umlal v20.2d,v17.2s,v6.s[2]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v19.2d,v17.2s,v4.s[2]
+ fmov d15,x6
+
+ add v11.2s,v11.2s,v26.2s
+ add x12,x3,x12,lsr#40
+ umlal v23.2d,v18.2s,v0.s[2]
+ add x13,x3,x13,lsr#40
+ umlal v22.2d,v18.2s,v8.s[2]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v21.2d,v18.2s,v6.s[2]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v18.2s,v4.s[2]
+ fmov d16,x8
+ umlal v19.2d,v18.2s,v2.s[2]
+ fmov d17,x10
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ fmov d18,x12
+ umlal v22.2d,v11.2s,v1.s[0]
+ ldp x8,x12,[x1],#16 // inp[0:1]
+ umlal v19.2d,v11.2s,v6.s[0]
+ ldp x9,x13,[x1],#48
+ umlal v23.2d,v11.2s,v3.s[0]
+ umlal v20.2d,v11.2s,v8.s[0]
+ umlal v21.2d,v11.2s,v0.s[0]
+#ifdef __AARCH64EB__
+ rev x8,x8
+ rev x12,x12
+ rev x9,x9
+ rev x13,x13
+#endif
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.s[0]
+ umlal v23.2d,v9.2s,v7.s[0]
+ and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
+ umlal v21.2d,v9.2s,v3.s[0]
+ and x5,x9,#0x03ffffff
+ umlal v19.2d,v9.2s,v0.s[0]
+ ubfx x6,x8,#26,#26
+ umlal v20.2d,v9.2s,v1.s[0]
+ ubfx x7,x9,#26,#26
+
+ add v12.2s,v12.2s,v27.2s
+ add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
+ umlal v22.2d,v10.2s,v3.s[0]
+ extr x8,x12,x8,#52
+ umlal v23.2d,v10.2s,v5.s[0]
+ extr x9,x13,x9,#52
+ umlal v19.2d,v10.2s,v8.s[0]
+ add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
+ umlal v21.2d,v10.2s,v1.s[0]
+ fmov d9,x4
+ umlal v20.2d,v10.2s,v0.s[0]
+ and x8,x8,#0x03ffffff
+
+ add v13.2s,v13.2s,v28.2s
+ and x9,x9,#0x03ffffff
+ umlal v22.2d,v12.2s,v0.s[0]
+ ubfx x10,x12,#14,#26
+ umlal v19.2d,v12.2s,v4.s[0]
+ ubfx x11,x13,#14,#26
+ umlal v23.2d,v12.2s,v1.s[0]
+ add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
+ umlal v20.2d,v12.2s,v6.s[0]
+ fmov d10,x6
+ umlal v21.2d,v12.2s,v8.s[0]
+ add x12,x3,x12,lsr#40
+
+ umlal v22.2d,v13.2s,v8.s[0]
+ add x13,x3,x13,lsr#40
+ umlal v19.2d,v13.2s,v2.s[0]
+ add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
+ umlal v23.2d,v13.2s,v0.s[0]
+ add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
+ umlal v20.2d,v13.2s,v4.s[0]
+ fmov d11,x8
+ umlal v21.2d,v13.2s,v6.s[0]
+ fmov d12,x10
+ fmov d13,x12
+
+ /////////////////////////////////////////////////////////////////
+ // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
+ // and P. Schwabe
+ //
+ // [see discussion in poly1305-armv4 module]
+
+ ushr v29.2d,v22.2d,#26
+ xtn v27.2s,v22.2d
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ xtn v28.2s,v23.2d
+ ushr v30.2d,v20.2d,#26
+ xtn v25.2s,v20.2d
+ bic v28.2s,#0xfc,lsl#24
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ shrn v30.2s,v21.2d,#26
+ xtn v26.2s,v21.2d
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ bic v25.2s,#0xfc,lsl#24
+ add v27.2s,v27.2s,v30.2s // h2 -> h3
+ bic v26.2s,#0xfc,lsl#24
+
+ shrn v29.2s,v19.2d,#26
+ xtn v24.2s,v19.2d
+ ushr v30.2s,v27.2s,#26
+ bic v27.2s,#0xfc,lsl#24
+ bic v24.2s,#0xfc,lsl#24
+ add v25.2s,v25.2s,v29.2s // h0 -> h1
+ add v28.2s,v28.2s,v30.2s // h3 -> h4
+
+ b.hi .Loop_neon
+
+.Lskip_loop:
+ dup v16.2d,v16.d[0]
+ add v11.2s,v11.2s,v26.2s
+
+ ////////////////////////////////////////////////////////////////
+ // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
+
+ adds x2,x2,#32
+ b.ne .Long_tail
+
+ dup v16.2d,v11.d[0]
+ add v14.2s,v9.2s,v24.2s
+ add v17.2s,v12.2s,v27.2s
+ add v15.2s,v10.2s,v25.2s
+ add v18.2s,v13.2s,v28.2s
+
+.Long_tail:
+ dup v14.2d,v14.d[0]
+ umull2 v19.2d,v16.4s,v6.4s
+ umull2 v22.2d,v16.4s,v1.4s
+ umull2 v23.2d,v16.4s,v3.4s
+ umull2 v21.2d,v16.4s,v0.4s
+ umull2 v20.2d,v16.4s,v8.4s
+
+ dup v15.2d,v15.d[0]
+ umlal2 v19.2d,v14.4s,v0.4s
+ umlal2 v21.2d,v14.4s,v3.4s
+ umlal2 v22.2d,v14.4s,v5.4s
+ umlal2 v23.2d,v14.4s,v7.4s
+ umlal2 v20.2d,v14.4s,v1.4s
+
+ dup v17.2d,v17.d[0]
+ umlal2 v19.2d,v15.4s,v8.4s
+ umlal2 v22.2d,v15.4s,v3.4s
+ umlal2 v21.2d,v15.4s,v1.4s
+ umlal2 v23.2d,v15.4s,v5.4s
+ umlal2 v20.2d,v15.4s,v0.4s
+
+ dup v18.2d,v18.d[0]
+ umlal2 v22.2d,v17.4s,v0.4s
+ umlal2 v23.2d,v17.4s,v1.4s
+ umlal2 v19.2d,v17.4s,v4.4s
+ umlal2 v20.2d,v17.4s,v6.4s
+ umlal2 v21.2d,v17.4s,v8.4s
+
+ umlal2 v22.2d,v18.4s,v8.4s
+ umlal2 v19.2d,v18.4s,v2.4s
+ umlal2 v23.2d,v18.4s,v0.4s
+ umlal2 v20.2d,v18.4s,v4.4s
+ umlal2 v21.2d,v18.4s,v6.4s
+
+ b.eq .Lshort_tail
+
+ ////////////////////////////////////////////////////////////////
+ // (hash+inp[0:1])*r^4:r^3 and accumulate
+
+ add v9.2s,v9.2s,v24.2s
+ umlal v22.2d,v11.2s,v1.2s
+ umlal v19.2d,v11.2s,v6.2s
+ umlal v23.2d,v11.2s,v3.2s
+ umlal v20.2d,v11.2s,v8.2s
+ umlal v21.2d,v11.2s,v0.2s
+
+ add v10.2s,v10.2s,v25.2s
+ umlal v22.2d,v9.2s,v5.2s
+ umlal v19.2d,v9.2s,v0.2s
+ umlal v23.2d,v9.2s,v7.2s
+ umlal v20.2d,v9.2s,v1.2s
+ umlal v21.2d,v9.2s,v3.2s
+
+ add v12.2s,v12.2s,v27.2s
+ umlal v22.2d,v10.2s,v3.2s
+ umlal v19.2d,v10.2s,v8.2s
+ umlal v23.2d,v10.2s,v5.2s
+ umlal v20.2d,v10.2s,v0.2s
+ umlal v21.2d,v10.2s,v1.2s
+
+ add v13.2s,v13.2s,v28.2s
+ umlal v22.2d,v12.2s,v0.2s
+ umlal v19.2d,v12.2s,v4.2s
+ umlal v23.2d,v12.2s,v1.2s
+ umlal v20.2d,v12.2s,v6.2s
+ umlal v21.2d,v12.2s,v8.2s
+
+ umlal v22.2d,v13.2s,v8.2s
+ umlal v19.2d,v13.2s,v2.2s
+ umlal v23.2d,v13.2s,v0.2s
+ umlal v20.2d,v13.2s,v4.2s
+ umlal v21.2d,v13.2s,v6.2s
+
+.Lshort_tail:
+ ////////////////////////////////////////////////////////////////
+ // horizontal add
+
+ addp v22.2d,v22.2d,v22.2d
+ ldp d8,d9,[sp,#16] // meet ABI requirements
+ addp v19.2d,v19.2d,v19.2d
+ ldp d10,d11,[sp,#32]
+ addp v23.2d,v23.2d,v23.2d
+ ldp d12,d13,[sp,#48]
+ addp v20.2d,v20.2d,v20.2d
+ ldp d14,d15,[sp,#64]
+ addp v21.2d,v21.2d,v21.2d
+ ldr x30,[sp,#8]
+ .inst 0xd50323bf // autiasp
+
+ ////////////////////////////////////////////////////////////////
+ // lazy reduction, but without narrowing
+
+ ushr v29.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ ushr v30.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+
+ add v23.2d,v23.2d,v29.2d // h3 -> h4
+ add v20.2d,v20.2d,v30.2d // h0 -> h1
+
+ ushr v29.2d,v23.2d,#26
+ and v23.16b,v23.16b,v31.16b
+ ushr v30.2d,v20.2d,#26
+ and v20.16b,v20.16b,v31.16b
+ add v21.2d,v21.2d,v30.2d // h1 -> h2
+
+ add v19.2d,v19.2d,v29.2d
+ shl v29.2d,v29.2d,#2
+ ushr v30.2d,v21.2d,#26
+ and v21.16b,v21.16b,v31.16b
+ add v19.2d,v19.2d,v29.2d // h4 -> h0
+ add v22.2d,v22.2d,v30.2d // h2 -> h3
+
+ ushr v29.2d,v19.2d,#26
+ and v19.16b,v19.16b,v31.16b
+ ushr v30.2d,v22.2d,#26
+ and v22.16b,v22.16b,v31.16b
+ add v20.2d,v20.2d,v29.2d // h0 -> h1
+ add v23.2d,v23.2d,v30.2d // h3 -> h4
+
+ ////////////////////////////////////////////////////////////////
+ // write the result, can be partially reduced
+
+ st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
+ mov x4,#1
+ st1 {v23.s}[0],[x0]
+ str x4,[x0,#8] // set is_base2_26
+
+ ldr x29,[sp],#80
+ ret
+.size poly1305_blocks_neon,.-poly1305_blocks_neon
+
+.align 5
+.Lzeros:
+.long 0,0,0,0,0,0,0,0
+.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
+.align 2
+#if !defined(__KERNEL__) && !defined(_WIN64)
+.comm OPENSSL_armcap_P,4,4
+.hidden OPENSSL_armcap_P
+#endif
diff --git a/arch/arm64/crypto/poly1305-glue.c b/arch/arm64/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..dd843d0ee83a
--- /dev/null
+++ b/arch/arm64/crypto/poly1305-glue.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/hwcap.h>
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <crypto/internal/simd.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/jump_label.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_arm64(void *state, const u8 *key);
+asmlinkage void poly1305_blocks(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit(void *state, __le32 *digest, const u32 *nonce);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_arm64(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int neon_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void neon_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit, bool do_neon)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_arch(dctx, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ if (static_branch_likely(&have_neon) && likely(do_neon))
+ poly1305_blocks_neon(&dctx->h, src, len, hibit);
+ else
+ poly1305_blocks(&dctx->h, src, len, hibit);
+}
+
+static void neon_poly1305_do_update(struct poly1305_desc_ctx *dctx,
+ const u8 *src, u32 len, bool do_neon)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ neon_poly1305_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1, false);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ neon_poly1305_blocks(dctx, src, len, 1, do_neon);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+}
+
+static int neon_poly1305_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ bool do_neon = crypto_simd_usable() && srclen > 128;
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_begin();
+ neon_poly1305_do_update(dctx, src, srclen, do_neon);
+ if (static_branch_likely(&have_neon) && do_neon)
+ kernel_neon_end();
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ if (static_branch_likely(&have_neon) && crypto_simd_usable()) {
+ kernel_neon_begin();
+ poly1305_blocks_neon(&dctx->h, src, len, 1);
+ kernel_neon_end();
+ } else {
+ poly1305_blocks(&dctx->h, src, len, 1);
+ }
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int neon_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg neon_poly1305_alg = {
+ .init = neon_poly1305_init,
+ .update = neon_poly1305_update,
+ .final = neon_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-neon",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init neon_poly1305_mod_init(void)
+{
+ if (!cpu_have_named_feature(ASIMD))
+ return 0;
+
+ static_branch_enable(&have_neon);
+
+ return crypto_register_shash(&neon_poly1305_alg);
+}
+
+static void __exit neon_poly1305_mod_exit(void)
+{
+ if (cpu_have_named_feature(ASIMD))
+ crypto_unregister_shash(&neon_poly1305_alg);
+}
+
+module_init(neon_poly1305_mod_init);
+module_exit(neon_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-neon");
diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c
index 38ee1514cd9c..0b727edf4104 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -51,7 +51,7 @@ int hw_breakpoint_slots(int type)
case TYPE_DATA:
return get_num_wrps();
default:
- pr_warning("unknown slot type: %d\n", type);
+ pr_warn("unknown slot type: %d\n", type);
return 0;
}
}
@@ -112,7 +112,7 @@ static u64 read_wb_reg(int reg, int n)
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_READ_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to read from unknown breakpoint register %d\n", n);
+ pr_warn("attempt to read from unknown breakpoint register %d\n", n);
}
return val;
@@ -127,7 +127,7 @@ static void write_wb_reg(int reg, int n, u64 val)
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WVR, AARCH64_DBG_REG_NAME_WVR, val);
GEN_WRITE_WB_REG_CASES(AARCH64_DBG_REG_WCR, AARCH64_DBG_REG_NAME_WCR, val);
default:
- pr_warning("attempt to write to unknown breakpoint register %d\n", n);
+ pr_warn("attempt to write to unknown breakpoint register %d\n", n);
}
isb();
}
@@ -145,7 +145,7 @@ static enum dbg_active_el debug_exception_level(int privilege)
case AARCH64_BREAKPOINT_EL1:
return DBG_ACTIVE_EL1;
default:
- pr_warning("invalid breakpoint privilege level %d\n", privilege);
+ pr_warn("invalid breakpoint privilege level %d\n", privilege);
return -EINVAL;
}
}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..ab149bcc3dc7 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -345,8 +345,7 @@ void __cpu_die(unsigned int cpu)
*/
err = op_cpu_kill(cpu);
if (err)
- pr_warn("CPU%d may not have shut down cleanly: %d\n",
- cpu, err);
+ pr_warn("CPU%d may not have shut down cleanly: %d\n", cpu, err);
}
/*
@@ -976,8 +975,8 @@ void smp_send_stop(void)
udelay(1);
if (num_online_cpus() > 1)
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(cpu_online_mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(cpu_online_mask));
sdei_mask_local_cpu();
}
@@ -1017,8 +1016,8 @@ void crash_smp_send_stop(void)
udelay(1);
if (atomic_read(&waiting_for_crash_ipi) > 0)
- pr_warning("SMP: failed to stop secondary CPUs %*pbl\n",
- cpumask_pr_args(&mask));
+ pr_warn("SMP: failed to stop secondary CPUs %*pbl\n",
+ cpumask_pr_args(&mask));
sdei_mask_local_cpu();
}
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index 009057517bdd..841a8b4ce114 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -5,6 +5,8 @@
* Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>
*/
+#define RO_EXCEPTION_TABLE_ALIGN 8
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#include <asm/kernel-pgtable.h>
@@ -132,11 +134,9 @@ SECTIONS
. = ALIGN(SEGMENT_ALIGN);
_etext = .; /* End of text section */
- RO_DATA(PAGE_SIZE) /* everything from this point to */
- EXCEPTION_TABLE(8) /* __init_begin will be marked RO NX */
- NOTES
+ /* everything from this point to __init_begin will be marked RO NX */
+ RO_DATA(PAGE_SIZE)
- . = ALIGN(PAGE_SIZE);
idmap_pg_dir = .;
. += IDMAP_DIR_SIZE;
@@ -212,7 +212,7 @@ SECTIONS
_data = .;
_sdata = .;
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_ALIGN)
/*
* Data written with the MMU off but read with the MMU on requires
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index a9f541912289..5a3b15a14a7f 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1060,6 +1060,8 @@ int arch_add_memory(int nid, u64 start, u64 size,
__create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
size, PAGE_KERNEL, __pgd_pgtable_alloc, flags);
+ memblock_clear_nomap(start, size);
+
return __add_pages(nid, start >> PAGE_SHIFT, size >> PAGE_SHIFT,
restrictions);
}
diff --git a/arch/c6x/kernel/vmlinux.lds.S b/arch/c6x/kernel/vmlinux.lds.S
index 584bab2bace6..ac99ba0864bf 100644
--- a/arch/c6x/kernel/vmlinux.lds.S
+++ b/arch/c6x/kernel/vmlinux.lds.S
@@ -5,6 +5,9 @@
* Copyright (C) 2010, 2011 Texas Instruments Incorporated
* Mark Salter <msalter@redhat.com>
*/
+
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
#include <asm/page.h>
@@ -80,10 +83,7 @@ SECTIONS
*(.gnu.warning)
}
- EXCEPTION_TABLE(16)
- NOTES
-
- RO_DATA_SECTION(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE)
.const :
{
*(.const .const.* .gnu.linkonce.r.*)
diff --git a/arch/csky/kernel/vmlinux.lds.S b/arch/csky/kernel/vmlinux.lds.S
index ae7961b973f2..2ff37beaf2bf 100644
--- a/arch/csky/kernel/vmlinux.lds.S
+++ b/arch/csky/kernel/vmlinux.lds.S
@@ -49,11 +49,10 @@ SECTIONS
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
- NOTES
EXCEPTION_TABLE(L1_CACHE_BYTES)
BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
VBR_BASE
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 49f716c0a1df..6b1afc2f9b68 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/thread_info.h>
@@ -37,9 +40,7 @@ SECTIONS
#endif
_etext = . ;
}
- EXCEPTION_TABLE(16)
- NOTES
- RO_DATA_SECTION(4)
+ RO_DATA(4)
ROMEND = .;
#if defined(CONFIG_ROMKERNEL)
. = RAMTOP;
@@ -48,7 +49,7 @@ SECTIONS
#endif
_sdata = . ;
__data_start = . ;
- RW_DATA_SECTION(0, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(0, PAGE_SIZE, THREAD_SIZE)
#if defined(CONFIG_ROMKERNEL)
#undef ADDR
#endif
diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 78f2418e97c8..0ca2471ddb9f 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S
@@ -49,12 +49,11 @@ SECTIONS
INIT_DATA_SECTION(PAGE_SIZE)
_sdata = .;
- RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
- RO_DATA_SECTION(PAGE_SIZE)
+ RW_DATA(32,PAGE_SIZE,_THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
_edata = .;
EXCEPTION_TABLE(16)
- NOTES
BSS_SECTION(_PAGE_SIZE, _PAGE_SIZE, _PAGE_SIZE)
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index bb320c6d0cc9..c49fcef754de 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -289,7 +289,7 @@ static void __init setup_crashkernel(unsigned long total, int *n)
}
if (!check_crashkernel_memory(base, size)) {
- pr_warning("crashkernel: There would be kdump memory "
+ pr_warn("crashkernel: There would be kdump memory "
"at %ld GB but this is unusable because it "
"must\nbe below 4 GB. Change the memory "
"configuration of the machine.\n",
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index 1e95d32c8877..91b4024c9351 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -132,7 +132,7 @@ static __u64 vtime_delta(struct task_struct *tsk)
return delta_stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
struct thread_info *ti = task_thread_info(tsk);
__u64 stime = vtime_delta(tsk);
@@ -146,7 +146,7 @@ void vtime_account_system(struct task_struct *tsk)
else
ti->stime += stime;
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index d9d4e21107cd..1ec6b703c5b4 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -5,6 +5,9 @@
#include <asm/pgtable.h>
#include <asm/thread_info.h>
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
OUTPUT_FORMAT("elf64-ia64-little")
@@ -13,7 +16,7 @@ ENTRY(phys_start)
jiffies = jiffies_64;
PHDRS {
- code PT_LOAD;
+ text PT_LOAD;
percpu PT_LOAD;
data PT_LOAD;
note PT_NOTE;
@@ -36,7 +39,7 @@ SECTIONS {
phys_start = _start - LOAD_OFFSET;
code : {
- } :code
+ } :text
. = KERNEL_START;
_text = .;
@@ -68,11 +71,6 @@ SECTIONS {
/*
* Read-only data
*/
- NOTES :code :note /* put .notes in text and mark in PT_NOTE */
- code_continues : {
- } : code /* switch back to regular program... */
-
- EXCEPTION_TABLE(16)
/* MCA table */
. = ALIGN(16);
@@ -102,11 +100,11 @@ SECTIONS {
__start_unwind = .;
*(.IA_64.unwind*)
__end_unwind = .;
- } :code :unwind
+ } :text :unwind
code_continues2 : {
- } : code
+ } :text
- RODATA
+ RO_DATA(4096)
.opd : AT(ADDR(.opd) - LOAD_OFFSET) {
__start_opd = .;
@@ -214,7 +212,7 @@ SECTIONS {
_end = .;
code : {
- } :code
+ } :text
STABS_DEBUG
DWARF_DEBUG
diff --git a/arch/m68k/kernel/vmlinux-nommu.lds b/arch/m68k/kernel/vmlinux-nommu.lds
index cf6edda38971..7b975420c3d9 100644
--- a/arch/m68k/kernel/vmlinux-nommu.lds
+++ b/arch/m68k/kernel/vmlinux-nommu.lds
@@ -60,8 +60,8 @@ SECTIONS {
#endif
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
- RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
+ RW_DATA(16, PAGE_SIZE, THREAD_SIZE)
_edata = .;
EXCEPTION_TABLE(16)
diff --git a/arch/m68k/kernel/vmlinux-std.lds b/arch/m68k/kernel/vmlinux-std.lds
index 625a5785804f..4d33da4e7106 100644
--- a/arch/m68k/kernel/vmlinux-std.lds
+++ b/arch/m68k/kernel/vmlinux-std.lds
@@ -31,9 +31,9 @@ SECTIONS
_sdata = .; /* Start of data section */
- RODATA
+ RO_DATA(4096)
- RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(16, PAGE_SIZE, THREAD_SIZE)
BSS_SECTION(0, 0, 0)
diff --git a/arch/m68k/kernel/vmlinux-sun3.lds b/arch/m68k/kernel/vmlinux-sun3.lds
index 9868270b0984..87d9f4d08f65 100644
--- a/arch/m68k/kernel/vmlinux-sun3.lds
+++ b/arch/m68k/kernel/vmlinux-sun3.lds
@@ -24,13 +24,13 @@ SECTIONS
*(.fixup)
*(.gnu.warning)
} :text = 0x4e75
- RODATA
+ RO_DATA(4096)
_etext = .; /* End of text section */
EXCEPTION_TABLE(16) :data
_sdata = .; /* Start of rw data section */
- RW_DATA_SECTION(16, PAGE_SIZE, THREAD_SIZE) :data
+ RW_DATA(16, PAGE_SIZE, THREAD_SIZE) :data
/* End of data goes *here* so that freeing init code works properly. */
_edata = .;
NOTES
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index c9c4be822456..a75896f18e58 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -46,6 +46,7 @@ config MICROBLAZE
select VIRT_TO_BUS
select CPU_NO_EFFICIENT_FFS
select MMU_GATHER_NO_RANGE if MMU
+ select SPARSE_IRQ
# Endianness selection
choice
diff --git a/arch/microblaze/configs/mmu_defconfig b/arch/microblaze/configs/mmu_defconfig
index 654edfdc7867..b3b433db89d8 100644
--- a/arch/microblaze/configs/mmu_defconfig
+++ b/arch/microblaze/configs/mmu_defconfig
@@ -33,6 +33,8 @@ CONFIG_INET=y
# CONFIG_IPV6 is not set
CONFIG_BRIDGE=m
CONFIG_PCI=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
CONFIG_MTD=y
CONFIG_MTD_CFI=y
CONFIG_MTD_CFI_INTELEXT=y
@@ -73,6 +75,7 @@ CONFIG_UIO_PDRV_GENIRQ=y
CONFIG_UIO_DMEM_GENIRQ=y
CONFIG_EXT2_FS=y
# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
CONFIG_CRAMFS=y
CONFIG_ROMFS_FS=y
CONFIG_NFS_FS=y
diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h
index d785defeeed5..eac2fb4b3fb9 100644
--- a/arch/microblaze/include/asm/irq.h
+++ b/arch/microblaze/include/asm/irq.h
@@ -9,7 +9,6 @@
#ifndef _ASM_MICROBLAZE_IRQ_H
#define _ASM_MICROBLAZE_IRQ_H
-#define NR_IRQS (32 + 1)
#include <asm-generic/irq.h>
struct pt_regs;
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 4e1b567becd6..de7083bd1d24 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -738,14 +738,9 @@ no_intr_resched:
andi r5, r5, _TIF_NEED_RESCHED;
beqi r5, restore /* if zero jump over */
-preempt:
/* interrupts are off that's why I am calling preempt_chedule_irq */
bralid r15, preempt_schedule_irq
nop
- lwi r11, CURRENT_TASK, TS_THREAD_INFO; /* get thread info */
- lwi r5, r11, TI_FLAGS; /* get flags in thread info */
- andi r5, r5, _TIF_NEED_RESCHED;
- bnei r5, preempt /* if non zero jump to resched */
restore:
#endif
VM_OFF /* MS: turn off MMU */
diff --git a/arch/microblaze/kernel/head.S b/arch/microblaze/kernel/head.S
index f264fdcf152a..7d2894418691 100644
--- a/arch/microblaze/kernel/head.S
+++ b/arch/microblaze/kernel/head.S
@@ -99,7 +99,7 @@ big_endian:
_prepare_copy_fdt:
or r11, r0, r0 /* incremment */
ori r4, r0, TOPHYS(_fdt_start)
- ori r3, r0, (0x8000 - 4)
+ ori r3, r0, (0x10000 - 4)
_copy_fdt:
lw r12, r7, r11 /* r12 = r7 + r11 */
sw r12, r4, r11 /* addr[r4 + r11] = r12 */
diff --git a/arch/microblaze/kernel/vmlinux.lds.S b/arch/microblaze/kernel/vmlinux.lds.S
index e1f3e8741292..2c09fa3a8a01 100644
--- a/arch/microblaze/kernel/vmlinux.lds.S
+++ b/arch/microblaze/kernel/vmlinux.lds.S
@@ -11,6 +11,8 @@
OUTPUT_ARCH(microblaze)
ENTRY(microblaze_start)
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/thread_info.h>
@@ -46,14 +48,12 @@ SECTIONS {
__fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
_fdt_start = . ; /* place for fdt blob */
*(__fdt_blob) ; /* Any link-placed DTB */
- . = _fdt_start + 0x8000; /* Pad up to 32kbyte */
+ . = _fdt_start + 0x10000; /* Pad up to 64kbyte */
_fdt_end = . ;
}
. = ALIGN(16);
- RODATA
- EXCEPTION_TABLE(16)
- NOTES
+ RO_DATA(4096)
/*
* sdata2 section can go anywhere, but must be word aligned
@@ -70,7 +70,7 @@ SECTIONS {
}
_sdata = . ;
- RW_DATA_SECTION(32, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(32, PAGE_SIZE, THREAD_SIZE)
_edata = . ;
/* Under the microblaze ABI, .sdata and .sbss must be contiguous */
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index c86be02b6d89..61b9269cdd3e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -74,6 +74,7 @@ config MIPS
select HAVE_PERF_EVENTS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RSEQ
+ select HAVE_SPARSE_SYSCALL_NR
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 0a5eab626260..e1c44aed8156 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -326,7 +326,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/
# See arch/mips/Kbuild for content of core part of the kernel
core-y += arch/mips/
-drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/
+drivers-y += arch/mips/crypto/
drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
# suspend and hibernation support
diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile
index e07aca572c2e..8e1deaf00e0c 100644
--- a/arch/mips/crypto/Makefile
+++ b/arch/mips/crypto/Makefile
@@ -4,3 +4,21 @@
#
obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
+
+obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
+chacha-mips-y := chacha-core.o chacha-glue.o
+AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
+
+obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
+poly1305-mips-y := poly1305-core.o poly1305-glue.o
+
+perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
+perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+
+quiet_cmd_perlasm = PERLASM $@
+ cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
+
+$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
+ $(call if_changed,perlasm)
+
+targets += poly1305-core.S
diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S
new file mode 100644
index 000000000000..5755f69cfe00
--- /dev/null
+++ b/arch/mips/crypto/chacha-core.S
@@ -0,0 +1,497 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved.
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#define MASK_U32 0x3c
+#define CHACHA20_BLOCK_SIZE 64
+#define STACK_SIZE 32
+
+#define X0 $t0
+#define X1 $t1
+#define X2 $t2
+#define X3 $t3
+#define X4 $t4
+#define X5 $t5
+#define X6 $t6
+#define X7 $t7
+#define X8 $t8
+#define X9 $t9
+#define X10 $v1
+#define X11 $s6
+#define X12 $s5
+#define X13 $s4
+#define X14 $s3
+#define X15 $s2
+/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */
+#define T0 $s1
+#define T1 $s0
+#define T(n) T ## n
+#define X(n) X ## n
+
+/* Input arguments */
+#define STATE $a0
+#define OUT $a1
+#define IN $a2
+#define BYTES $a3
+
+/* Output argument */
+/* NONCE[0] is kept in a register and not in memory.
+ * We don't want to touch original value in memory.
+ * Must be incremented every loop iteration.
+ */
+#define NONCE_0 $v0
+
+/* SAVED_X and SAVED_CA are set in the jump table.
+ * Use regs which are overwritten on exit else we don't leak clear data.
+ * They are used to handling the last bytes which are not multiple of 4.
+ */
+#define SAVED_X X15
+#define SAVED_CA $s7
+
+#define IS_UNALIGNED $s7
+
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define MSB 0
+#define LSB 3
+#define ROTx rotl
+#define ROTR(n) rotr n, 24
+#define CPU_TO_LE32(n) \
+ wsbh n; \
+ rotr n, 16;
+#else
+#define MSB 3
+#define LSB 0
+#define ROTx rotr
+#define CPU_TO_LE32(n)
+#define ROTR(n)
+#endif
+
+#define FOR_EACH_WORD(x) \
+ x( 0); \
+ x( 1); \
+ x( 2); \
+ x( 3); \
+ x( 4); \
+ x( 5); \
+ x( 6); \
+ x( 7); \
+ x( 8); \
+ x( 9); \
+ x(10); \
+ x(11); \
+ x(12); \
+ x(13); \
+ x(14); \
+ x(15);
+
+#define FOR_EACH_WORD_REV(x) \
+ x(15); \
+ x(14); \
+ x(13); \
+ x(12); \
+ x(11); \
+ x(10); \
+ x( 9); \
+ x( 8); \
+ x( 7); \
+ x( 6); \
+ x( 5); \
+ x( 4); \
+ x( 3); \
+ x( 2); \
+ x( 1); \
+ x( 0);
+
+#define PLUS_ONE_0 1
+#define PLUS_ONE_1 2
+#define PLUS_ONE_2 3
+#define PLUS_ONE_3 4
+#define PLUS_ONE_4 5
+#define PLUS_ONE_5 6
+#define PLUS_ONE_6 7
+#define PLUS_ONE_7 8
+#define PLUS_ONE_8 9
+#define PLUS_ONE_9 10
+#define PLUS_ONE_10 11
+#define PLUS_ONE_11 12
+#define PLUS_ONE_12 13
+#define PLUS_ONE_13 14
+#define PLUS_ONE_14 15
+#define PLUS_ONE_15 16
+#define PLUS_ONE(x) PLUS_ONE_ ## x
+#define _CONCAT3(a,b,c) a ## b ## c
+#define CONCAT3(a,b,c) _CONCAT3(a,b,c)
+
+#define STORE_UNALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lwl T1, (x*4)+MSB ## (IN); \
+ lwr T1, (x*4)+LSB ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ swl X ## x, (x*4)+MSB ## (OUT); \
+ swr X ## x, (x*4)+LSB ## (OUT);
+
+#define STORE_ALIGNED(x) \
+CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \
+ .if (x != 12); \
+ lw T0, (x*4)(STATE); \
+ .endif; \
+ lw T1, (x*4) ## (IN); \
+ .if (x == 12); \
+ addu X ## x, NONCE_0; \
+ .else; \
+ addu X ## x, T0; \
+ .endif; \
+ CPU_TO_LE32(X ## x); \
+ xor X ## x, T1; \
+ sw X ## x, (x*4) ## (OUT);
+
+/* Jump table macro.
+ * Used for setup and handling the last bytes, which are not multiple of 4.
+ * X15 is free to store Xn
+ * Every jumptable entry must be equal in size.
+ */
+#define JMPTBL_ALIGNED(x) \
+.Lchacha_mips_jmptbl_aligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha_mips_xor_aligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define JMPTBL_UNALIGNED(x) \
+.Lchacha_mips_jmptbl_unaligned_ ## x: ; \
+ .set noreorder; \
+ b .Lchacha_mips_xor_unaligned_ ## x ## _b; \
+ .if (x == 12); \
+ addu SAVED_X, X ## x, NONCE_0; \
+ .else; \
+ addu SAVED_X, X ## x, SAVED_CA; \
+ .endif; \
+ .set reorder
+
+#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \
+ addu X(A), X(K); \
+ addu X(B), X(L); \
+ addu X(C), X(M); \
+ addu X(D), X(N); \
+ xor X(V), X(A); \
+ xor X(W), X(B); \
+ xor X(Y), X(C); \
+ xor X(Z), X(D); \
+ rotl X(V), S; \
+ rotl X(W), S; \
+ rotl X(Y), S; \
+ rotl X(Z), S;
+
+.text
+.set reorder
+.set noat
+.globl chacha_crypt_arch
+.ent chacha_crypt_arch
+chacha_crypt_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ /* Load number of rounds */
+ lw $at, 16($sp)
+
+ addiu $sp, -STACK_SIZE
+
+ /* Return bytes = 0. */
+ beqz BYTES, .Lchacha_mips_end
+
+ lw NONCE_0, 48(STATE)
+
+ /* Save s0-s7 */
+ sw $s0, 0($sp)
+ sw $s1, 4($sp)
+ sw $s2, 8($sp)
+ sw $s3, 12($sp)
+ sw $s4, 16($sp)
+ sw $s5, 20($sp)
+ sw $s6, 24($sp)
+ sw $s7, 28($sp)
+
+ /* Test IN or OUT is unaligned.
+ * IS_UNALIGNED = ( IN | OUT ) & 0x00000003
+ */
+ or IS_UNALIGNED, IN, OUT
+ andi IS_UNALIGNED, 0x3
+
+ b .Lchacha_rounds_start
+
+.align 4
+.Loop_chacha_rounds:
+ addiu IN, CHACHA20_BLOCK_SIZE
+ addiu OUT, CHACHA20_BLOCK_SIZE
+ addiu NONCE_0, 1
+
+.Lchacha_rounds_start:
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+
+ move X12, NONCE_0
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_chacha_xor_rounds:
+ addiu $at, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $at, .Loop_chacha_xor_rounds
+
+ addiu BYTES, -(CHACHA20_BLOCK_SIZE)
+
+ /* Is data src/dst unaligned? Jump */
+ bnez IS_UNALIGNED, .Loop_chacha_unaligned
+
+ /* Set number rounds here to fill delayslot. */
+ lw $at, (STACK_SIZE+16)($sp)
+
+ /* BYTES < 0, it has no full block. */
+ bltz BYTES, .Lchacha_mips_no_full_block_aligned
+
+ FOR_EACH_WORD_REV(STORE_ALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha_rounds
+
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+
+ /* BYTES < 0? Handle last bytes */
+ bltz BYTES, .Lchacha_mips_xor_bytes
+
+.Lchacha_mips_xor_done:
+ /* Restore used registers */
+ lw $s0, 0($sp)
+ lw $s1, 4($sp)
+ lw $s2, 8($sp)
+ lw $s3, 12($sp)
+ lw $s4, 16($sp)
+ lw $s5, 20($sp)
+ lw $s6, 24($sp)
+ lw $s7, 28($sp)
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+.Lchacha_mips_end:
+ addiu $sp, STACK_SIZE
+ jr $ra
+
+.Lchacha_mips_no_full_block_aligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_ALIGNED)
+
+
+.Loop_chacha_unaligned:
+ /* Set number rounds here to fill delayslot. */
+ lw $at, (STACK_SIZE+16)($sp)
+
+ /* BYTES > 0, it has no full block. */
+ bltz BYTES, .Lchacha_mips_no_full_block_unaligned
+
+ FOR_EACH_WORD_REV(STORE_UNALIGNED)
+
+ /* BYTES > 0? Loop again. */
+ bgtz BYTES, .Loop_chacha_rounds
+
+ /* Write NONCE_0 back to right location in state */
+ sw NONCE_0, 48(STATE)
+
+ .set noreorder
+ /* Fall through to byte handling */
+ bgez BYTES, .Lchacha_mips_xor_done
+.Lchacha_mips_xor_unaligned_0_b:
+.Lchacha_mips_xor_aligned_0_b:
+ /* Place this here to fill delay slot */
+ addiu NONCE_0, 1
+ .set reorder
+
+.Lchacha_mips_xor_bytes:
+ addu IN, $at
+ addu OUT, $at
+ /* First byte */
+ lbu T1, 0(IN)
+ addiu $at, BYTES, 1
+ CPU_TO_LE32(SAVED_X)
+ ROTR(SAVED_X)
+ xor T1, SAVED_X
+ sb T1, 0(OUT)
+ beqz $at, .Lchacha_mips_xor_done
+ /* Second byte */
+ lbu T1, 1(IN)
+ addiu $at, BYTES, 2
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 1(OUT)
+ beqz $at, .Lchacha_mips_xor_done
+ /* Third byte */
+ lbu T1, 2(IN)
+ ROTx SAVED_X, 8
+ xor T1, SAVED_X
+ sb T1, 2(OUT)
+ b .Lchacha_mips_xor_done
+
+.Lchacha_mips_no_full_block_unaligned:
+ /* Restore the offset on BYTES */
+ addiu BYTES, CHACHA20_BLOCK_SIZE
+
+ /* Get number of full WORDS */
+ andi $at, BYTES, MASK_U32
+
+ /* Load upper half of jump table addr */
+ lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0)
+
+ /* Calculate lower half jump table offset */
+ ins T0, $at, 1, 6
+
+ /* Add offset to STATE */
+ addu T1, STATE, $at
+
+ /* Add lower half jump table addr */
+ addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0)
+
+ /* Read value from STATE */
+ lw SAVED_CA, 0(T1)
+
+ /* Store remaining bytecounter as negative value */
+ subu BYTES, $at, BYTES
+
+ jr T0
+
+ /* Jump table */
+ FOR_EACH_WORD(JMPTBL_UNALIGNED)
+.end chacha_crypt_arch
+.set at
+
+/* Input arguments
+ * STATE $a0
+ * OUT $a1
+ * NROUND $a2
+ */
+
+#undef X12
+#undef X13
+#undef X14
+#undef X15
+
+#define X12 $a3
+#define X13 $at
+#define X14 $v0
+#define X15 STATE
+
+.set noat
+.globl hchacha_block_arch
+.ent hchacha_block_arch
+hchacha_block_arch:
+ .frame $sp, STACK_SIZE, $ra
+
+ addiu $sp, -STACK_SIZE
+
+ /* Save X11(s6) */
+ sw X11, 0($sp)
+
+ lw X0, 0(STATE)
+ lw X1, 4(STATE)
+ lw X2, 8(STATE)
+ lw X3, 12(STATE)
+ lw X4, 16(STATE)
+ lw X5, 20(STATE)
+ lw X6, 24(STATE)
+ lw X7, 28(STATE)
+ lw X8, 32(STATE)
+ lw X9, 36(STATE)
+ lw X10, 40(STATE)
+ lw X11, 44(STATE)
+ lw X12, 48(STATE)
+ lw X13, 52(STATE)
+ lw X14, 56(STATE)
+ lw X15, 60(STATE)
+
+.Loop_hchacha_xor_rounds:
+ addiu $a2, -2
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12);
+ AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8);
+ AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12);
+ AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8);
+ AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7);
+ bnez $a2, .Loop_hchacha_xor_rounds
+
+ /* Restore used register */
+ lw X11, 0($sp)
+
+ sw X0, 0(OUT)
+ sw X1, 4(OUT)
+ sw X2, 8(OUT)
+ sw X3, 12(OUT)
+ sw X12, 16(OUT)
+ sw X13, 20(OUT)
+ sw X14, 24(OUT)
+ sw X15, 28(OUT)
+
+ addiu $sp, STACK_SIZE
+ jr $ra
+.end hchacha_block_arch
+.set at
diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c
new file mode 100644
index 000000000000..779e399c9bef
--- /dev/null
+++ b/arch/mips/crypto/chacha-glue.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MIPS accelerated ChaCha and XChaCha stream ciphers,
+ * including ChaCha20 (RFC7539)
+ *
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/byteorder.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/chacha.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src,
+ unsigned int bytes, int nrounds);
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds);
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+static int chacha_mips_stream_xor(struct skcipher_request *req,
+ const struct chacha_ctx *ctx, const u8 *iv)
+{
+ struct skcipher_walk walk;
+ u32 state[16];
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
+
+ chacha_init_generic(state, ctx->key, iv);
+
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
+
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
+
+ chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr,
+ nbytes, ctx->nrounds);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+ }
+
+ return err;
+}
+
+static int chacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+ return chacha_mips_stream_xor(req, ctx, req->iv);
+}
+
+static int xchacha_mips(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct chacha_ctx subctx;
+ u32 state[16];
+ u8 real_iv[16];
+
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ hchacha_block(state, subctx.key, ctx->nrounds);
+ subctx.nrounds = ctx->nrounds;
+
+ memcpy(&real_iv[0], req->iv + 24, 8);
+ memcpy(&real_iv[8], req->iv + 16, 8);
+ return chacha_mips_stream_xor(req, &subctx, real_iv);
+}
+
+static struct skcipher_alg algs[] = {
+ {
+ .base.cra_name = "chacha20",
+ .base.cra_driver_name = "chacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = CHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = chacha_mips,
+ .decrypt = chacha_mips,
+ }, {
+ .base.cra_name = "xchacha20",
+ .base.cra_driver_name = "xchacha20-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha20_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }, {
+ .base.cra_name = "xchacha12",
+ .base.cra_driver_name = "xchacha12-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct chacha_ctx),
+ .base.cra_module = THIS_MODULE,
+
+ .min_keysize = CHACHA_KEY_SIZE,
+ .max_keysize = CHACHA_KEY_SIZE,
+ .ivsize = XCHACHA_IV_SIZE,
+ .chunksize = CHACHA_BLOCK_SIZE,
+ .setkey = chacha12_setkey,
+ .encrypt = xchacha_mips,
+ .decrypt = xchacha_mips,
+ }
+};
+
+static int __init chacha_simd_mod_init(void)
+{
+ return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+static void __exit chacha_simd_mod_fini(void)
+{
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+}
+
+module_init(chacha_simd_mod_init);
+module_exit(chacha_simd_mod_fini);
+
+MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("chacha20");
+MODULE_ALIAS_CRYPTO("chacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha20");
+MODULE_ALIAS_CRYPTO("xchacha20-mips");
+MODULE_ALIAS_CRYPTO("xchacha12");
+MODULE_ALIAS_CRYPTO("xchacha12-mips");
diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c
new file mode 100644
index 000000000000..b759b6ccc361
--- /dev/null
+++ b/arch/mips/crypto/poly1305-glue.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
+ *
+ * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/unaligned.h>
+#include <crypto/algapi.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+asmlinkage void poly1305_init_mips(void *state, const u8 *key);
+asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
+asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
+
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+{
+ poly1305_init_mips(&dctx->h, key);
+ dctx->s[0] = get_unaligned_le32(key + 16);
+ dctx->s[1] = get_unaligned_le32(key + 20);
+ dctx->s[2] = get_unaligned_le32(key + 24);
+ dctx->s[3] = get_unaligned_le32(key + 28);
+ dctx->buflen = 0;
+}
+EXPORT_SYMBOL(poly1305_init_arch);
+
+static int mips_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
+ u32 len, u32 hibit)
+{
+ if (unlikely(!dctx->sset)) {
+ if (!dctx->rset) {
+ poly1305_init_mips(&dctx->h, src);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->rset = 1;
+ }
+ if (len >= POLY1305_BLOCK_SIZE) {
+ dctx->s[0] = get_unaligned_le32(src + 0);
+ dctx->s[1] = get_unaligned_le32(src + 4);
+ dctx->s[2] = get_unaligned_le32(src + 8);
+ dctx->s[3] = get_unaligned_le32(src + 12);
+ src += POLY1305_BLOCK_SIZE;
+ len -= POLY1305_BLOCK_SIZE;
+ dctx->sset = true;
+ }
+ if (len < POLY1305_BLOCK_SIZE)
+ return;
+ }
+
+ len &= ~(POLY1305_BLOCK_SIZE - 1);
+
+ poly1305_blocks_mips(&dctx->h, src, len, hibit);
+}
+
+static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
+ unsigned int len)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ len -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(len >= POLY1305_BLOCK_SIZE)) {
+ mips_poly1305_blocks(dctx, src, len, 1);
+ src += round_down(len, POLY1305_BLOCK_SIZE);
+ len %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(len)) {
+ dctx->buflen = len;
+ memcpy(dctx->buf, src, len);
+ }
+ return 0;
+}
+
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int nbytes)
+{
+ if (unlikely(dctx->buflen)) {
+ u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
+
+ memcpy(dctx->buf + dctx->buflen, src, bytes);
+ src += bytes;
+ nbytes -= bytes;
+ dctx->buflen += bytes;
+
+ if (dctx->buflen == POLY1305_BLOCK_SIZE) {
+ poly1305_blocks_mips(&dctx->h, dctx->buf,
+ POLY1305_BLOCK_SIZE, 1);
+ dctx->buflen = 0;
+ }
+ }
+
+ if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
+ unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
+
+ poly1305_blocks_mips(&dctx->h, src, len, 1);
+ src += len;
+ nbytes %= POLY1305_BLOCK_SIZE;
+ }
+
+ if (unlikely(nbytes)) {
+ dctx->buflen = nbytes;
+ memcpy(dctx->buf, src, nbytes);
+ }
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
+{
+ __le32 digest[4];
+ u64 f = 0;
+
+ if (unlikely(dctx->buflen)) {
+ dctx->buf[dctx->buflen++] = 1;
+ memset(dctx->buf + dctx->buflen, 0,
+ POLY1305_BLOCK_SIZE - dctx->buflen);
+ poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
+ }
+
+ poly1305_emit_mips(&dctx->h, digest, dctx->s);
+
+ /* mac = (h + s) % (2^128) */
+ f = (f >> 32) + le32_to_cpu(digest[0]);
+ put_unaligned_le32(f, dst);
+ f = (f >> 32) + le32_to_cpu(digest[1]);
+ put_unaligned_le32(f, dst + 4);
+ f = (f >> 32) + le32_to_cpu(digest[2]);
+ put_unaligned_le32(f, dst + 8);
+ f = (f >> 32) + le32_to_cpu(digest[3]);
+ put_unaligned_le32(f, dst + 12);
+
+ *dctx = (struct poly1305_desc_ctx){};
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_arch(dctx, dst);
+ return 0;
+}
+
+static struct shash_alg mips_poly1305_alg = {
+ .init = mips_poly1305_init,
+ .update = mips_poly1305_update,
+ .final = mips_poly1305_final,
+ .digestsize = POLY1305_DIGEST_SIZE,
+ .descsize = sizeof(struct poly1305_desc_ctx),
+
+ .base.cra_name = "poly1305",
+ .base.cra_driver_name = "poly1305-mips",
+ .base.cra_priority = 200,
+ .base.cra_blocksize = POLY1305_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+};
+
+static int __init mips_poly1305_mod_init(void)
+{
+ return crypto_register_shash(&mips_poly1305_alg);
+}
+
+static void __exit mips_poly1305_mod_exit(void)
+{
+ crypto_unregister_shash(&mips_poly1305_alg);
+}
+
+module_init(mips_poly1305_mod_init);
+module_exit(mips_poly1305_mod_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS_CRYPTO("poly1305");
+MODULE_ALIAS_CRYPTO("poly1305-mips");
diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl
new file mode 100644
index 000000000000..b05bab884ed2
--- /dev/null
+++ b/arch/mips/crypto/poly1305-mips.pl
@@ -0,0 +1,1273 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
+#
+# ====================================================================
+# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
+# project.
+# ====================================================================
+
+# Poly1305 hash for MIPS.
+#
+# May 2016
+#
+# Numbers are cycles per processed byte with poly1305_blocks alone.
+#
+# IALU/gcc
+# R1x000 ~5.5/+130% (big-endian)
+# Octeon II 2.50/+70% (little-endian)
+#
+# March 2019
+#
+# Add 32-bit code path.
+#
+# October 2019
+#
+# Modulo-scheduling reduction allows to omit dependency chain at the
+# end of inner loop and improve performance. Also optimize MIPS32R2
+# code path for MIPS 1004K core. Per René von Dorst's suggestions.
+#
+# IALU/gcc
+# R1x000 ~9.8/? (big-endian)
+# Octeon II 3.65/+140% (little-endian)
+# MT7621/1004K 4.75/? (little-endian)
+#
+######################################################################
+# There is a number of MIPS ABI in use, O32 and N32/64 are most
+# widely used. Then there is a new contender: NUBI. It appears that if
+# one picks the latter, it's possible to arrange code in ABI neutral
+# manner. Therefore let's stick to NUBI register layout:
+#
+($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
+($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
+($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
+#
+# The return value is placed in $a0. Following coding rules facilitate
+# interoperability:
+#
+# - never ever touch $tp, "thread pointer", former $gp [o32 can be
+# excluded from the rule, because it's specified volatile];
+# - copy return value to $t0, former $v0 [or to $a0 if you're adapting
+# old code];
+# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
+#
+# For reference here is register layout for N32/64 MIPS ABIs:
+#
+# ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
+# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
+# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
+# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
+# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
+#
+# <appro@openssl.org>
+#
+######################################################################
+
+$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
+
+$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
+
+if ($flavour =~ /64|n32/i) {{{
+######################################################################
+# 64-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
+ defined(_MIPS_ARCH_MIPS64R6)) \\
+ && !defined(_MIPS_ARCH_MIPS64R2)
+# define _MIPS_ARCH_MIPS64R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+# define dmultu(rs,rt)
+# define mflo(rd,rs,rt) dmulu rd,rs,rt
+# define mfhi(rd,rs,rt) dmuhu rd,rs,rt
+#else
+# define dmultu(rs,rt) dmultu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 7
+#else
+# define MSB 7
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sd $zero,0($ctx)
+ sd $zero,8($ctx)
+ sd $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $tmp0,$inp,7 # $inp % 8
+ dsubu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ ld $in0,0($inp)
+ ld $in1,8($inp)
+ beqz $tmp0,.Laligned_key
+ ld $tmp2,16($inp)
+
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ dsllv $in0,$in0,$tmp0
+ dsrlv $tmp3,$in1,$tmp1
+ dsllv $in1,$in1,$tmp0
+ dsrlv $tmp2,$tmp2,$tmp1
+# else
+ dsrlv $in0,$in0,$tmp0
+ dsllv $tmp3,$in1,$tmp1
+ dsrlv $in1,$in1,$tmp0
+ dsllv $tmp2,$tmp2,$tmp1
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_key:
+#else
+ ldl $in0,0+MSB($inp)
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ li $tmp0,1
+ dsll $tmp0,32 # 0x0000000100000000
+ daddiu $tmp0,-63 # 0x00000000ffffffc1
+ dsll $tmp0,28 # 0x0ffffffc10000000
+ daddiu $tmp0,-1 # 0x0ffffffc0fffffff
+
+ and $in0,$tmp0
+ daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
+ and $in1,$tmp0
+
+ sd $in0,24($ctx)
+ dsrl $tmp0,$in1,2
+ sd $in1,32($ctx)
+ daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
+ sd $tmp0,40($ctx)
+
+.Lno_key:
+ li $v0,0 # return 0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
+
+my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
+ ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
+my ($shr,$shl) = ($s6,$s7); # used on R6
+
+$code.=<<___;
+.align 5
+.globl poly1305_blocks
+.ent poly1305_blocks
+poly1305_blocks:
+ .set noreorder
+ dsrl $len,4 # number of complete blocks
+ bnez $len,poly1305_blocks_internal
+ nop
+ jr $ra
+ nop
+.end poly1305_blocks
+
+.align 5
+.ent poly1305_blocks_internal
+poly1305_blocks_internal:
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ .frame $sp,8*8,$ra
+ .mask $SAVED_REGS_MASK|0x000c0000,-8
+ dsubu $sp,8*8
+ sd $s7,56($sp)
+ sd $s6,48($sp)
+#else
+ .frame $sp,6*8,$ra
+ .mask $SAVED_REGS_MASK,-8
+ dsubu $sp,6*8
+#endif
+ sd $s5,40($sp)
+ sd $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sd $s3,24($sp)
+ sd $s2,16($sp)
+ sd $s1,8($sp)
+ sd $s0,0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+#if defined(_MIPS_ARCH_MIPS64R6)
+ andi $shr,$inp,7
+ dsubu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+ subu $shl,$zero,$shr
+#endif
+
+ ld $h0,0($ctx) # load hash value
+ ld $h1,8($ctx)
+ ld $h2,16($ctx)
+
+ ld $r0,24($ctx) # load key
+ ld $r1,32($ctx)
+ ld $rs1,40($ctx)
+
+ dsll $len,4
+ daddu $len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $in0,0($inp) # load input
+ ld $in1,8($inp)
+ beqz $shr,.Laligned_inp
+
+ ld $tmp2,16($inp)
+# ifdef MIPSEB
+ dsllv $in0,$in0,$shr
+ dsrlv $tmp3,$in1,$shl
+ dsllv $in1,$in1,$shr
+ dsrlv $tmp2,$tmp2,$shl
+# else
+ dsrlv $in0,$in0,$shr
+ dsllv $tmp3,$in1,$shl
+ dsrlv $in1,$in1,$shr
+ dsllv $tmp2,$tmp2,$shl
+# endif
+ or $in0,$in0,$tmp3
+ or $in1,$in1,$tmp2
+.Laligned_inp:
+#else
+ ldl $in0,0+MSB($inp) # load input
+ ldl $in1,8+MSB($inp)
+ ldr $in0,0+LSB($inp)
+ ldr $in1,8+LSB($inp)
+#endif
+ daddiu $inp,16
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS64R2)
+ dsbh $in0,$in0 # byte swap
+ dsbh $in1,$in1
+ dshd $in0,$in0
+ dshd $in1,$in1
+# else
+ ori $tmp0,$zero,0xFF
+ dsll $tmp2,$tmp0,32
+ or $tmp0,$tmp2 # 0x000000FF000000FF
+
+ and $tmp1,$in0,$tmp0 # byte swap
+ and $tmp3,$in1,$tmp0
+ dsrl $tmp2,$in0,24
+ dsrl $tmp4,$in1,24
+ dsll $tmp1,24
+ dsll $tmp3,24
+ and $tmp2,$tmp0
+ and $tmp4,$tmp0
+ dsll $tmp0,8 # 0x0000FF000000FF00
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ and $tmp2,$in0,$tmp0
+ and $tmp4,$in1,$tmp0
+ dsrl $in0,8
+ dsrl $in1,8
+ dsll $tmp2,8
+ dsll $tmp4,8
+ and $in0,$tmp0
+ and $in1,$tmp0
+ or $tmp1,$tmp2
+ or $tmp3,$tmp4
+ or $in0,$tmp1
+ or $in1,$tmp3
+ dsrl $tmp1,$in0,32
+ dsrl $tmp3,$in1,32
+ dsll $in0,32
+ dsll $in1,32
+ or $in0,$tmp1
+ or $in1,$tmp3
+# endif
+#endif
+ dsrl $tmp1,$h2,2 # modulo-scheduled reduction
+ andi $h2,$h2,3
+ dsll $tmp0,$tmp1,2
+
+ daddu $d0,$h0,$in0 # accumulate input
+ daddu $tmp1,$tmp0
+ sltu $tmp0,$d0,$h0
+ daddu $d0,$d0,$tmp1 # ... and residue
+ sltu $tmp1,$d0,$tmp1
+ daddu $d1,$h1,$in1
+ daddu $tmp0,$tmp1
+ sltu $tmp1,$d1,$h1
+ daddu $d1,$tmp0
+
+ dmultu ($r0,$d0) # h0*r0
+ daddu $d2,$h2,$padbit
+ sltu $tmp0,$d1,$tmp0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ dmultu ($rs1,$d1) # h1*5*r1
+ daddu $d2,$tmp1
+ daddu $d2,$tmp0
+ mflo ($tmp0,$rs1,$d1)
+ mfhi ($tmp1,$rs1,$d1)
+
+ dmultu ($r1,$d0) # h0*r1
+ mflo ($tmp2,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ daddu $h0,$tmp0
+ daddu $h1,$tmp1
+ sltu $tmp0,$h0,$tmp0
+
+ dmultu ($r0,$d1) # h1*r0
+ daddu $h1,$tmp0
+ daddu $h1,$tmp2
+ mflo ($tmp0,$r0,$d1)
+ mfhi ($tmp1,$r0,$d1)
+
+ dmultu ($rs1,$d2) # h2*5*r1
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ mflo ($tmp2,$rs1,$d2)
+
+ dmultu ($r0,$d2) # h2*r0
+ daddu $h1,$tmp0
+ daddu $h2,$tmp1
+ mflo ($tmp3,$r0,$d2)
+ sltu $tmp0,$h1,$tmp0
+ daddu $h2,$tmp0
+
+ daddu $h1,$tmp2
+ sltu $tmp2,$h1,$tmp2
+ daddu $h2,$tmp2
+ daddu $h2,$tmp3
+
+ bne $inp,$len,.Loop
+
+ sd $h0,0($ctx) # store hash value
+ sd $h1,8($ctx)
+ sd $h2,16($ctx)
+
+ .set noreorder
+#if defined(_MIPS_ARCH_MIPS64R6)
+ ld $s7,56($sp)
+ ld $s6,48($sp)
+#endif
+ ld $s5,40($sp) # epilogue
+ ld $s4,32($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
+ ld $s3,24($sp)
+ ld $s2,16($sp)
+ ld $s1,8($sp)
+ ld $s0,0($sp)
+___
+$code.=<<___;
+ jr $ra
+#if defined(_MIPS_ARCH_MIPS64R6)
+ daddu $sp,8*8
+#else
+ daddu $sp,6*8
+#endif
+.end poly1305_blocks_internal
+___
+}
+{
+my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ ld $tmp2,16($ctx)
+ ld $tmp0,0($ctx)
+ ld $tmp1,8($ctx)
+
+ li $in0,-4 # final reduction
+ dsrl $in1,$tmp2,2
+ and $in0,$tmp2
+ andi $tmp2,$tmp2,3
+ daddu $in0,$in1
+
+ daddu $tmp0,$tmp0,$in0
+ sltu $in1,$tmp0,$in0
+ daddiu $in0,$tmp0,5 # compare to modulus
+ daddu $tmp1,$tmp1,$in1
+ sltiu $tmp3,$in0,5
+ sltu $tmp4,$tmp1,$in1
+ daddu $in1,$tmp1,$tmp3
+ daddu $tmp2,$tmp2,$tmp4
+ sltu $tmp3,$in1,$tmp3
+ daddu $tmp2,$tmp2,$tmp3
+
+ dsrl $tmp2,2 # see if it carried/borrowed
+ dsubu $tmp2,$zero,$tmp2
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ and $in0,$tmp2
+ and $in1,$tmp2
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+
+ lwu $tmp0,0($nonce) # load nonce
+ lwu $tmp1,4($nonce)
+ lwu $tmp2,8($nonce)
+ lwu $tmp3,12($nonce)
+ dsll $tmp1,32
+ dsll $tmp3,32
+ or $tmp0,$tmp1
+ or $tmp2,$tmp3
+
+ daddu $in0,$tmp0 # accumulate nonce
+ daddu $in1,$tmp2
+ sltu $tmp0,$in0,$tmp0
+ daddu $in1,$tmp0
+
+ dsrl $tmp0,$in0,8 # write mac value
+ dsrl $tmp1,$in0,16
+ dsrl $tmp2,$in0,24
+ sb $in0,0($mac)
+ dsrl $tmp3,$in0,32
+ sb $tmp0,1($mac)
+ dsrl $tmp0,$in0,40
+ sb $tmp1,2($mac)
+ dsrl $tmp1,$in0,48
+ sb $tmp2,3($mac)
+ dsrl $tmp2,$in0,56
+ sb $tmp3,4($mac)
+ dsrl $tmp3,$in1,8
+ sb $tmp0,5($mac)
+ dsrl $tmp0,$in1,16
+ sb $tmp1,6($mac)
+ dsrl $tmp1,$in1,24
+ sb $tmp2,7($mac)
+
+ sb $in1,8($mac)
+ dsrl $tmp2,$in1,32
+ sb $tmp3,9($mac)
+ dsrl $tmp3,$in1,40
+ sb $tmp0,10($mac)
+ dsrl $tmp0,$in1,48
+ sb $tmp1,11($mac)
+ dsrl $tmp1,$in1,56
+ sb $tmp2,12($mac)
+ sb $tmp3,13($mac)
+ sb $tmp0,14($mac)
+ sb $tmp1,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}} else {{{
+######################################################################
+# 32-bit code path
+#
+
+my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
+my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
+ ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
+
+$code.=<<___;
+#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
+ defined(_MIPS_ARCH_MIPS32R6)) \\
+ && !defined(_MIPS_ARCH_MIPS32R2)
+# define _MIPS_ARCH_MIPS32R2
+#endif
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+# define multu(rs,rt)
+# define mflo(rd,rs,rt) mulu rd,rs,rt
+# define mfhi(rd,rs,rt) muhu rd,rs,rt
+#else
+# define multu(rs,rt) multu rs,rt
+# define mflo(rd,rs,rt) mflo rd
+# define mfhi(rd,rs,rt) mfhi rd
+#endif
+
+#ifdef __KERNEL__
+# define poly1305_init poly1305_init_mips
+# define poly1305_blocks poly1305_blocks_mips
+# define poly1305_emit poly1305_emit_mips
+#endif
+
+#if defined(__MIPSEB__) && !defined(MIPSEB)
+# define MIPSEB
+#endif
+
+#ifdef MIPSEB
+# define MSB 0
+# define LSB 3
+#else
+# define MSB 3
+# define LSB 0
+#endif
+
+.text
+.set noat
+.set noreorder
+
+.align 5
+.globl poly1305_init
+.ent poly1305_init
+poly1305_init:
+ .frame $sp,0,$ra
+ .set reorder
+
+ sw $zero,0($ctx)
+ sw $zero,4($ctx)
+ sw $zero,8($ctx)
+ sw $zero,12($ctx)
+ sw $zero,16($ctx)
+
+ beqz $inp,.Lno_key
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $tmp0,$inp,3 # $inp % 4
+ subu $inp,$inp,$tmp0 # align $inp
+ sll $tmp0,$tmp0,3 # byte to bit offset
+ lw $in0,0($inp)
+ lw $in1,4($inp)
+ lw $in2,8($inp)
+ lw $in3,12($inp)
+ beqz $tmp0,.Laligned_key
+
+ lw $tmp2,16($inp)
+ subu $tmp1,$zero,$tmp0
+# ifdef MIPSEB
+ sllv $in0,$in0,$tmp0
+ srlv $tmp3,$in1,$tmp1
+ sllv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ srlv $tmp3,$in2,$tmp1
+ sllv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ srlv $tmp3,$in3,$tmp1
+ sllv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ srlv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# else
+ srlv $in0,$in0,$tmp0
+ sllv $tmp3,$in1,$tmp1
+ srlv $in1,$in1,$tmp0
+ or $in0,$in0,$tmp3
+ sllv $tmp3,$in2,$tmp1
+ srlv $in2,$in2,$tmp0
+ or $in1,$in1,$tmp3
+ sllv $tmp3,$in3,$tmp1
+ srlv $in3,$in3,$tmp0
+ or $in2,$in2,$tmp3
+ sllv $tmp2,$tmp2,$tmp1
+ or $in3,$in3,$tmp2
+# endif
+.Laligned_key:
+#else
+ lwl $in0,0+MSB($inp)
+ lwl $in1,4+MSB($inp)
+ lwl $in2,8+MSB($inp)
+ lwl $in3,12+MSB($inp)
+ lwr $in0,0+LSB($inp)
+ lwr $in1,4+LSB($inp)
+ lwr $in2,8+LSB($inp)
+ lwr $in3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $in0,$in0 # byte swap
+ wsbh $in1,$in1
+ wsbh $in2,$in2
+ wsbh $in3,$in3
+ rotr $in0,$in0,16
+ rotr $in1,$in1,16
+ rotr $in2,$in2,16
+ rotr $in3,$in3,16
+# else
+ srl $tmp0,$in0,24 # byte swap
+ srl $tmp1,$in0,8
+ andi $tmp2,$in0,0xFF00
+ sll $in0,$in0,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in0,$tmp0
+ srl $tmp0,$in1,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in1,8
+ or $in0,$tmp1
+ andi $tmp1,$in1,0xFF00
+ sll $in1,$in1,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in1,$tmp0
+ srl $tmp0,$in2,24
+ or $tmp2,$tmp1
+ srl $tmp1,$in2,8
+ or $in1,$tmp2
+ andi $tmp2,$in2,0xFF00
+ sll $in2,$in2,24
+ andi $tmp1,0xFF00
+ sll $tmp2,$tmp2,8
+ or $in2,$tmp0
+ srl $tmp0,$in3,24
+ or $tmp1,$tmp2
+ srl $tmp2,$in3,8
+ or $in2,$tmp1
+ andi $tmp1,$in3,0xFF00
+ sll $in3,$in3,24
+ andi $tmp2,0xFF00
+ sll $tmp1,$tmp1,8
+ or $in3,$tmp0
+ or $tmp2,$tmp1
+ or $in3,$tmp2
+# endif
+#endif
+ lui $tmp0,0x0fff
+ ori $tmp0,0xffff # 0x0fffffff
+ and $in0,$in0,$tmp0
+ subu $tmp0,3 # 0x0ffffffc
+ and $in1,$in1,$tmp0
+ and $in2,$in2,$tmp0
+ and $in3,$in3,$tmp0
+
+ sw $in0,20($ctx)
+ sw $in1,24($ctx)
+ sw $in2,28($ctx)
+ sw $in3,32($ctx)
+
+ srl $tmp1,$in1,2
+ srl $tmp2,$in2,2
+ srl $tmp3,$in3,2
+ addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
+ addu $in2,$in2,$tmp2
+ addu $in3,$in3,$tmp3
+ sw $in1,36($ctx)
+ sw $in2,40($ctx)
+ sw $in3,44($ctx)
+.Lno_key:
+ li $v0,0
+ jr $ra
+.end poly1305_init
+___
+{
+my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
+
+my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
+ ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
+my ($d0,$d1,$d2,$d3) =
+ ($a4,$a5,$a6,$a7);
+my $shr = $t2; # used on R6
+my $one = $t2; # used on R2
+
+$code.=<<___;
+.globl poly1305_blocks
+.align 5
+.ent poly1305_blocks
+poly1305_blocks:
+ .frame $sp,16*4,$ra
+ .mask $SAVED_REGS_MASK,-4
+ .set noreorder
+ subu $sp, $sp,4*12
+ sw $s11,4*11($sp)
+ sw $s10,4*10($sp)
+ sw $s9, 4*9($sp)
+ sw $s8, 4*8($sp)
+ sw $s7, 4*7($sp)
+ sw $s6, 4*6($sp)
+ sw $s5, 4*5($sp)
+ sw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ sw $s3, 4*3($sp)
+ sw $s2, 4*2($sp)
+ sw $s1, 4*1($sp)
+ sw $s0, 4*0($sp)
+___
+$code.=<<___;
+ .set reorder
+
+ srl $len,4 # number of complete blocks
+ li $one,1
+ beqz $len,.Labort
+
+#if defined(_MIPS_ARCH_MIPS32R6)
+ andi $shr,$inp,3
+ subu $inp,$inp,$shr # align $inp
+ sll $shr,$shr,3 # byte to bit offset
+#endif
+
+ lw $h0,0($ctx) # load hash value
+ lw $h1,4($ctx)
+ lw $h2,8($ctx)
+ lw $h3,12($ctx)
+ lw $h4,16($ctx)
+
+ lw $r0,20($ctx) # load key
+ lw $r1,24($ctx)
+ lw $r2,28($ctx)
+ lw $r3,32($ctx)
+ lw $rs1,36($ctx)
+ lw $rs2,40($ctx)
+ lw $rs3,44($ctx)
+
+ sll $len,4
+ addu $len,$len,$inp # end of buffer
+ b .Loop
+
+.align 4
+.Loop:
+#if defined(_MIPS_ARCH_MIPS32R6)
+ lw $d0,0($inp) # load input
+ lw $d1,4($inp)
+ lw $d2,8($inp)
+ lw $d3,12($inp)
+ beqz $shr,.Laligned_inp
+
+ lw $t0,16($inp)
+ subu $t1,$zero,$shr
+# ifdef MIPSEB
+ sllv $d0,$d0,$shr
+ srlv $at,$d1,$t1
+ sllv $d1,$d1,$shr
+ or $d0,$d0,$at
+ srlv $at,$d2,$t1
+ sllv $d2,$d2,$shr
+ or $d1,$d1,$at
+ srlv $at,$d3,$t1
+ sllv $d3,$d3,$shr
+ or $d2,$d2,$at
+ srlv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# else
+ srlv $d0,$d0,$shr
+ sllv $at,$d1,$t1
+ srlv $d1,$d1,$shr
+ or $d0,$d0,$at
+ sllv $at,$d2,$t1
+ srlv $d2,$d2,$shr
+ or $d1,$d1,$at
+ sllv $at,$d3,$t1
+ srlv $d3,$d3,$shr
+ or $d2,$d2,$at
+ sllv $t0,$t0,$t1
+ or $d3,$d3,$t0
+# endif
+.Laligned_inp:
+#else
+ lwl $d0,0+MSB($inp) # load input
+ lwl $d1,4+MSB($inp)
+ lwl $d2,8+MSB($inp)
+ lwl $d3,12+MSB($inp)
+ lwr $d0,0+LSB($inp)
+ lwr $d1,4+LSB($inp)
+ lwr $d2,8+LSB($inp)
+ lwr $d3,12+LSB($inp)
+#endif
+#ifdef MIPSEB
+# if defined(_MIPS_ARCH_MIPS32R2)
+ wsbh $d0,$d0 # byte swap
+ wsbh $d1,$d1
+ wsbh $d2,$d2
+ wsbh $d3,$d3
+ rotr $d0,$d0,16
+ rotr $d1,$d1,16
+ rotr $d2,$d2,16
+ rotr $d3,$d3,16
+# else
+ srl $at,$d0,24 # byte swap
+ srl $t0,$d0,8
+ andi $t1,$d0,0xFF00
+ sll $d0,$d0,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d0,$at
+ srl $at,$d1,24
+ or $t0,$t1
+ srl $t1,$d1,8
+ or $d0,$t0
+ andi $t0,$d1,0xFF00
+ sll $d1,$d1,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d1,$at
+ srl $at,$d2,24
+ or $t1,$t0
+ srl $t0,$d2,8
+ or $d1,$t1
+ andi $t1,$d2,0xFF00
+ sll $d2,$d2,24
+ andi $t0,0xFF00
+ sll $t1,$t1,8
+ or $d2,$at
+ srl $at,$d3,24
+ or $t0,$t1
+ srl $t1,$d3,8
+ or $d2,$t0
+ andi $t0,$d3,0xFF00
+ sll $d3,$d3,24
+ andi $t1,0xFF00
+ sll $t0,$t0,8
+ or $d3,$at
+ or $t1,$t0
+ or $d3,$t1
+# endif
+#endif
+ srl $t0,$h4,2 # modulo-scheduled reduction
+ andi $h4,$h4,3
+ sll $at,$t0,2
+
+ addu $d0,$d0,$h0 # accumulate input
+ addu $t0,$t0,$at
+ sltu $h0,$d0,$h0
+ addu $d0,$d0,$t0 # ... and residue
+ sltu $at,$d0,$t0
+
+ addu $d1,$d1,$h1
+ addu $h0,$h0,$at # carry
+ sltu $h1,$d1,$h1
+ addu $d1,$d1,$h0
+ sltu $h0,$d1,$h0
+
+ addu $d2,$d2,$h2
+ addu $h1,$h1,$h0 # carry
+ sltu $h2,$d2,$h2
+ addu $d2,$d2,$h1
+ sltu $h1,$d2,$h1
+
+ addu $d3,$d3,$h3
+ addu $h2,$h2,$h1 # carry
+ sltu $h3,$d3,$h3
+ addu $d3,$d3,$h2
+
+#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
+ multu $r0,$d0 # d0*r0
+ sltu $h2,$d3,$h2
+ maddu $rs3,$d1 # d1*s3
+ addu $h3,$h3,$h2 # carry
+ maddu $rs2,$d2 # d2*s2
+ addu $h4,$h4,$padbit
+ maddu $rs1,$d3 # d3*s1
+ addu $h4,$h4,$h3
+ mfhi $at
+ mflo $h0
+
+ multu $r1,$d0 # d0*r1
+ maddu $r0,$d1 # d1*r0
+ maddu $rs3,$d2 # d2*s3
+ maddu $rs2,$d3 # d3*s2
+ maddu $rs1,$h4 # h4*s1
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h1
+
+ multu $r2,$d0 # d0*r2
+ maddu $r1,$d1 # d1*r1
+ maddu $r0,$d2 # d2*r0
+ maddu $rs3,$d3 # d3*s3
+ maddu $rs2,$h4 # h4*s2
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h2
+
+ mul $t0,$r0,$h4 # h4*r0
+
+ multu $r3,$d0 # d0*r3
+ maddu $r2,$d1 # d1*r2
+ maddu $r1,$d2 # d2*r1
+ maddu $r0,$d3 # d3*r0
+ maddu $rs3,$h4 # h4*s3
+ maddu $at,$one # hi*1
+ mfhi $at
+ mflo $h3
+
+ addiu $inp,$inp,16
+
+ addu $h4,$t0,$at
+#else
+ multu ($r0,$d0) # d0*r0
+ mflo ($h0,$r0,$d0)
+ mfhi ($h1,$r0,$d0)
+
+ sltu $h2,$d3,$h2
+ addu $h3,$h3,$h2 # carry
+
+ multu ($rs3,$d1) # d1*s3
+ mflo ($at,$rs3,$d1)
+ mfhi ($t0,$rs3,$d1)
+
+ addu $h4,$h4,$padbit
+ addiu $inp,$inp,16
+ addu $h4,$h4,$h3
+
+ multu ($rs2,$d2) # d2*s2
+ mflo ($a3,$rs2,$d2)
+ mfhi ($t1,$rs2,$d2)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($rs1,$d3) # d3*s1
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$rs1,$d3)
+ mfhi ($t0,$rs1,$d3)
+ addu $h0,$h0,$a3
+ addu $h1,$h1,$t1
+ multu ($r1,$d0) # d0*r1
+ sltu $a3,$h0,$a3
+ addu $h1,$h1,$a3
+
+
+ mflo ($a3,$r1,$d0)
+ mfhi ($h2,$r1,$d0)
+ addu $h0,$h0,$at
+ addu $h1,$h1,$t0
+ multu ($r0,$d1) # d1*r0
+ sltu $at,$h0,$at
+ addu $h1,$h1,$at
+
+ mflo ($at,$r0,$d1)
+ mfhi ($t0,$r0,$d1)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($rs3,$d2) # d2*s3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs3,$d2)
+ mfhi ($t1,$rs3,$d2)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($rs2,$d3) # d3*s2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+ mflo ($at,$rs2,$d3)
+ mfhi ($t0,$rs2,$d3)
+ addu $h1,$h1,$a3
+ addu $h2,$h2,$t1
+ multu ($rs1,$h4) # h4*s1
+ sltu $a3,$h1,$a3
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$rs1,$h4)
+ addu $h1,$h1,$at
+ addu $h2,$h2,$t0
+ multu ($r2,$d0) # d0*r2
+ sltu $at,$h1,$at
+ addu $h2,$h2,$at
+
+
+ mflo ($at,$r2,$d0)
+ mfhi ($h3,$r2,$d0)
+ addu $h1,$h1,$a3
+ sltu $a3,$h1,$a3
+ multu ($r1,$d1) # d1*r1
+ addu $h2,$h2,$a3
+
+ mflo ($a3,$r1,$d1)
+ mfhi ($t1,$r1,$d1)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r0,$d2) # d2*r0
+ addu $h3,$h3,$at
+
+ mflo ($at,$r0,$d2)
+ mfhi ($t0,$r0,$d2)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($rs3,$d3) # d3*s3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+ mflo ($a3,$rs3,$d3)
+ mfhi ($t1,$rs3,$d3)
+ addu $h2,$h2,$at
+ addu $h3,$h3,$t0
+ multu ($rs2,$h4) # h4*s2
+ sltu $at,$h2,$at
+ addu $h3,$h3,$at
+
+ mflo ($at,$rs2,$h4)
+ addu $h2,$h2,$a3
+ addu $h3,$h3,$t1
+ multu ($r3,$d0) # d0*r3
+ sltu $a3,$h2,$a3
+ addu $h3,$h3,$a3
+
+
+ mflo ($a3,$r3,$d0)
+ mfhi ($t1,$r3,$d0)
+ addu $h2,$h2,$at
+ sltu $at,$h2,$at
+ multu ($r2,$d1) # d1*r2
+ addu $h3,$h3,$at
+
+ mflo ($at,$r2,$d1)
+ mfhi ($t0,$r2,$d1)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ multu ($r0,$d3) # d3*r0
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$r0,$d3)
+ mfhi ($d3,$r0,$d3)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r1,$d2) # d2*r1
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+ mflo ($at,$r1,$d2)
+ mfhi ($t0,$r1,$d2)
+ addu $h3,$h3,$a3
+ addu $t1,$t1,$d3
+ multu ($rs3,$h4) # h4*s3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+
+ mflo ($a3,$rs3,$h4)
+ addu $h3,$h3,$at
+ addu $t1,$t1,$t0
+ multu ($r0,$h4) # h4*r0
+ sltu $at,$h3,$at
+ addu $t1,$t1,$at
+
+
+ mflo ($h4,$r0,$h4)
+ addu $h3,$h3,$a3
+ sltu $a3,$h3,$a3
+ addu $t1,$t1,$a3
+ addu $h4,$h4,$t1
+
+ li $padbit,1 # if we loop, padbit is 1
+#endif
+ bne $inp,$len,.Loop
+
+ sw $h0,0($ctx) # store hash value
+ sw $h1,4($ctx)
+ sw $h2,8($ctx)
+ sw $h3,12($ctx)
+ sw $h4,16($ctx)
+
+ .set noreorder
+.Labort:
+ lw $s11,4*11($sp)
+ lw $s10,4*10($sp)
+ lw $s9, 4*9($sp)
+ lw $s8, 4*8($sp)
+ lw $s7, 4*7($sp)
+ lw $s6, 4*6($sp)
+ lw $s5, 4*5($sp)
+ lw $s4, 4*4($sp)
+___
+$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
+ lw $s3, 4*3($sp)
+ lw $s2, 4*2($sp)
+ lw $s1, 4*1($sp)
+ lw $s0, 4*0($sp)
+___
+$code.=<<___;
+ jr $ra
+ addu $sp,$sp,4*12
+.end poly1305_blocks
+___
+}
+{
+my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
+
+$code.=<<___;
+.align 5
+.globl poly1305_emit
+.ent poly1305_emit
+poly1305_emit:
+ .frame $sp,0,$ra
+ .set reorder
+
+ lw $tmp4,16($ctx)
+ lw $tmp0,0($ctx)
+ lw $tmp1,4($ctx)
+ lw $tmp2,8($ctx)
+ lw $tmp3,12($ctx)
+
+ li $in0,-4 # final reduction
+ srl $ctx,$tmp4,2
+ and $in0,$in0,$tmp4
+ andi $tmp4,$tmp4,3
+ addu $ctx,$ctx,$in0
+
+ addu $tmp0,$tmp0,$ctx
+ sltu $ctx,$tmp0,$ctx
+ addiu $in0,$tmp0,5 # compare to modulus
+ addu $tmp1,$tmp1,$ctx
+ sltiu $in1,$in0,5
+ sltu $ctx,$tmp1,$ctx
+ addu $in1,$in1,$tmp1
+ addu $tmp2,$tmp2,$ctx
+ sltu $in2,$in1,$tmp1
+ sltu $ctx,$tmp2,$ctx
+ addu $in2,$in2,$tmp2
+ addu $tmp3,$tmp3,$ctx
+ sltu $in3,$in2,$tmp2
+ sltu $ctx,$tmp3,$ctx
+ addu $in3,$in3,$tmp3
+ addu $tmp4,$tmp4,$ctx
+ sltu $ctx,$in3,$tmp3
+ addu $ctx,$tmp4
+
+ srl $ctx,2 # see if it carried/borrowed
+ subu $ctx,$zero,$ctx
+
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+ and $in0,$ctx
+ and $in1,$ctx
+ and $in2,$ctx
+ and $in3,$ctx
+ xor $in0,$tmp0
+ xor $in1,$tmp1
+ xor $in2,$tmp2
+ xor $in3,$tmp3
+
+ lw $tmp0,0($nonce) # load nonce
+ lw $tmp1,4($nonce)
+ lw $tmp2,8($nonce)
+ lw $tmp3,12($nonce)
+
+ addu $in0,$tmp0 # accumulate nonce
+ sltu $ctx,$in0,$tmp0
+
+ addu $in1,$tmp1
+ sltu $tmp1,$in1,$tmp1
+ addu $in1,$ctx
+ sltu $ctx,$in1,$ctx
+ addu $ctx,$tmp1
+
+ addu $in2,$tmp2
+ sltu $tmp2,$in2,$tmp2
+ addu $in2,$ctx
+ sltu $ctx,$in2,$ctx
+ addu $ctx,$tmp2
+
+ addu $in3,$tmp3
+ addu $in3,$ctx
+
+ srl $tmp0,$in0,8 # write mac value
+ srl $tmp1,$in0,16
+ srl $tmp2,$in0,24
+ sb $in0, 0($mac)
+ sb $tmp0,1($mac)
+ srl $tmp0,$in1,8
+ sb $tmp1,2($mac)
+ srl $tmp1,$in1,16
+ sb $tmp2,3($mac)
+ srl $tmp2,$in1,24
+ sb $in1, 4($mac)
+ sb $tmp0,5($mac)
+ srl $tmp0,$in2,8
+ sb $tmp1,6($mac)
+ srl $tmp1,$in2,16
+ sb $tmp2,7($mac)
+ srl $tmp2,$in2,24
+ sb $in2, 8($mac)
+ sb $tmp0,9($mac)
+ srl $tmp0,$in3,8
+ sb $tmp1,10($mac)
+ srl $tmp1,$in3,16
+ sb $tmp2,11($mac)
+ srl $tmp2,$in3,24
+ sb $in3, 12($mac)
+ sb $tmp0,13($mac)
+ sb $tmp1,14($mac)
+ sb $tmp2,15($mac)
+
+ jr $ra
+.end poly1305_emit
+.rdata
+.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
+.align 2
+___
+}
+}}}
+
+$output=pop and open STDOUT,">$output";
+print $code;
+close STDOUT;
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 33ee0d18fb0a..a5f00ec73ea6 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -10,6 +10,11 @@
*/
#define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir)
+/* Cavium Octeon should not have a separate PT_NOTE Program Header. */
+#ifndef CONFIG_CAVIUM_OCTEON_SOC
+#define EMITS_PT_NOTE
+#endif
+
#include <asm-generic/vmlinux.lds.h>
#undef mips
@@ -76,16 +81,8 @@ SECTIONS
__stop___dbe_table = .;
}
-#ifdef CONFIG_CAVIUM_OCTEON_SOC
-#define NOTES_HEADER
-#else /* CONFIG_CAVIUM_OCTEON_SOC */
-#define NOTES_HEADER :note
-#endif /* CONFIG_CAVIUM_OCTEON_SOC */
- NOTES :text NOTES_HEADER
- .dummy : { *(.dummy) } :text
-
_sdata = .; /* Start of data section */
- RODATA
+ RO_DATA(4096)
/* writeable */
.data : { /* Data */
diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig
index 1434fa60f3db..94e9ce994494 100644
--- a/arch/mips/ralink/Kconfig
+++ b/arch/mips/ralink/Kconfig
@@ -51,6 +51,7 @@ choice
select MIPS_GIC
select COMMON_CLK
select CLKSRC_MIPS_GIC
+ select HAVE_PCI if PCI_MT7621
endchoice
choice
diff --git a/arch/nds32/kernel/vmlinux.lds.S b/arch/nds32/kernel/vmlinux.lds.S
index 9e90f30a181d..f679d3397436 100644
--- a/arch/nds32/kernel/vmlinux.lds.S
+++ b/arch/nds32/kernel/vmlinux.lds.S
@@ -53,12 +53,11 @@ SECTIONS
_etext = .; /* End of text and rodata section */
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
EXCEPTION_TABLE(16)
- NOTES
BSS_SECTION(4, 4, 4)
_end = .;
diff --git a/arch/nios2/kernel/vmlinux.lds.S b/arch/nios2/kernel/vmlinux.lds.S
index 6ad64f14617d..c55a7cfa1075 100644
--- a/arch/nios2/kernel/vmlinux.lds.S
+++ b/arch/nios2/kernel/vmlinux.lds.S
@@ -49,8 +49,8 @@ SECTIONS
__init_end = .;
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
BSS_SECTION(0, 0, 0)
@@ -58,7 +58,6 @@ SECTIONS
STABS_DEBUG
DWARF_DEBUG
- NOTES
DISCARDS
}
diff --git a/arch/openrisc/kernel/vmlinux.lds.S b/arch/openrisc/kernel/vmlinux.lds.S
index 2e2c72c157f3..60449fd7f16f 100644
--- a/arch/openrisc/kernel/vmlinux.lds.S
+++ b/arch/openrisc/kernel/vmlinux.lds.S
@@ -67,19 +67,18 @@ SECTIONS
_sdata = .;
- /* Page alignment required for RO_DATA_SECTION */
- RO_DATA_SECTION(PAGE_SIZE)
+ /* Page alignment required for RO_DATA */
+ RO_DATA(PAGE_SIZE)
_e_kernel_ro = .;
/* Whatever comes after _e_kernel_ro had better be page-aligend, too */
/* 32 here is cacheline size... recheck this */
- RW_DATA_SECTION(32, PAGE_SIZE, PAGE_SIZE)
+ RW_DATA(32, PAGE_SIZE, PAGE_SIZE)
_edata = .;
EXCEPTION_TABLE(4)
- NOTES
/* Init code and data */
. = ALIGN(PAGE_SIZE);
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index 99cd24f2ea01..53e29d88f99c 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -19,6 +19,7 @@
*(.data..vm0.pte)
#define CC_USING_PATCHABLE_FUNCTION_ENTRY
+#define RO_EXCEPTION_TABLE_ALIGN 8
#include <asm-generic/vmlinux.lds.h>
@@ -109,7 +110,7 @@ SECTIONS
_sdata = .;
/* Architecturally we need to keep __gp below 0x1000000 and thus
- * in front of RO_DATA_SECTION() which stores lots of tracepoint
+ * in front of RO_DATA() which stores lots of tracepoint
* and ftrace symbols. */
#ifdef CONFIG_64BIT
. = ALIGN(16);
@@ -127,11 +128,7 @@ SECTIONS
}
#endif
- RO_DATA_SECTION(8)
-
- /* RO because of BUILDTIME_EXTABLE_SORT */
- EXCEPTION_TABLE(8)
- NOTES
+ RO_DATA(8)
/* unwind info */
.PARISC.unwind : {
@@ -149,7 +146,7 @@ SECTIONS
data_start = .;
/* Data */
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, PAGE_SIZE)
/* PA-RISC locks requires 16-byte alignment */
. = ALIGN(16);
diff --git a/arch/powerpc/crypto/aes-spe-glue.c b/arch/powerpc/crypto/aes-spe-glue.c
index 3a4ca7d32477..1fad5d4c658d 100644
--- a/arch/powerpc/crypto/aes-spe-glue.c
+++ b/arch/powerpc/crypto/aes-spe-glue.c
@@ -17,7 +17,10 @@
#include <asm/byteorder.h>
#include <asm/switch_to.h>
#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
+#include <crypto/gf128mul.h>
+#include <crypto/scatterwalk.h>
/*
* MAX_BYTES defines the number of bytes that are allowed to be processed
@@ -118,13 +121,19 @@ static int ppc_aes_setkey(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
-static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
+static int ppc_aes_setkey_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return ppc_aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
+static int ppc_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct ppc_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
int err;
- err = xts_check_key(tfm, in_key, key_len);
+ err = xts_verify_key(tfm, in_key, key_len);
if (err)
return err;
@@ -133,7 +142,7 @@ static int ppc_xts_setkey(struct crypto_tfm *tfm, const u8 *in_key,
if (key_len != AES_KEYSIZE_128 &&
key_len != AES_KEYSIZE_192 &&
key_len != AES_KEYSIZE_256) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -178,208 +187,229 @@ static void ppc_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
spe_end();
}
-static int ppc_ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes);
+ if (enc)
+ ppc_encrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes);
+ else
+ ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ecb_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_ecb(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_ecb_crypt(req, true);
+}
- return err;
+static int ppc_ecb_decrypt(struct skcipher_request *req)
+{
+ return ppc_ecb_crypt(req, false);
}
-static int ppc_cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv);
+ if (enc)
+ ppc_encrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv);
+ else
+ ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv);
spe_end();
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_cbc_encrypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
- int err;
-
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
-
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
-
- spe_begin();
- ppc_decrypt_cbc(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv);
- spe_end();
-
- err = blkcipher_walk_done(desc, &walk, ubytes);
- }
+ return ppc_cbc_crypt(req, true);
+}
- return err;
+static int ppc_cbc_decrypt(struct skcipher_request *req)
+{
+ return ppc_cbc_crypt(req, false);
}
-static int ppc_ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_ctr_crypt(struct skcipher_request *req)
{
- struct ppc_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int pbytes, ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
+ err = skcipher_walk_virt(&walk, req, false);
- while ((pbytes = walk.nbytes)) {
- pbytes = pbytes > MAX_BYTES ? MAX_BYTES : pbytes;
- pbytes = pbytes == nbytes ?
- nbytes : pbytes & ~(AES_BLOCK_SIZE - 1);
- ubytes = walk.nbytes - pbytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
ppc_crypt_ctr(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, pbytes , walk.iv);
+ ctx->key_enc, ctx->rounds, nbytes, walk.iv);
spe_end();
- nbytes -= pbytes;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_crypt(struct skcipher_request *req, bool enc)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
+ err = skcipher_walk_virt(&walk, req, false);
twk = ctx->key_twk;
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ while ((nbytes = walk.nbytes) != 0) {
+ nbytes = min_t(unsigned int, nbytes, MAX_BYTES);
+ nbytes = round_down(nbytes, AES_BLOCK_SIZE);
spe_begin();
- ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_enc, ctx->rounds, nbytes, walk.iv, twk);
+ if (enc)
+ ppc_encrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_enc, ctx->rounds, nbytes,
+ walk.iv, twk);
+ else
+ ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
+ ctx->key_dec, ctx->rounds, nbytes,
+ walk.iv, twk);
spe_end();
twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
}
-static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
- struct scatterlist *src, unsigned int nbytes)
+static int ppc_xts_encrypt(struct skcipher_request *req)
{
- struct ppc_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- unsigned int ubytes;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[2][AES_BLOCK_SIZE];
int err;
- u32 *twk;
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- twk = ctx->key_twk;
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ req->cryptlen - tail, req->iv);
+ req = &subreq;
+ }
- while ((nbytes = walk.nbytes)) {
- ubytes = nbytes > MAX_BYTES ?
- nbytes - MAX_BYTES : nbytes & (AES_BLOCK_SIZE - 1);
- nbytes -= ubytes;
+ err = ppc_xts_crypt(req, true);
+ if (err || !tail)
+ return err;
- spe_begin();
- ppc_decrypt_xts(walk.dst.virt.addr, walk.src.virt.addr,
- ctx->key_dec, ctx->rounds, nbytes, walk.iv, twk);
- spe_end();
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE, 0);
+ memcpy(b[1], b[0], tail);
+ scatterwalk_map_and_copy(b[0], req->src, offset + AES_BLOCK_SIZE, tail, 0);
- twk = NULL;
- err = blkcipher_walk_done(desc, &walk, ubytes);
+ spe_begin();
+ ppc_encrypt_xts(b[0], b[0], ctx->key_enc, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
+}
+
+static int ppc_xts_decrypt(struct skcipher_request *req)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct ppc_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int tail = req->cryptlen % AES_BLOCK_SIZE;
+ int offset = req->cryptlen - tail - AES_BLOCK_SIZE;
+ struct skcipher_request subreq;
+ u8 b[3][AES_BLOCK_SIZE];
+ le128 twk;
+ int err;
+
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (tail) {
+ subreq = *req;
+ skcipher_request_set_crypt(&subreq, req->src, req->dst,
+ offset, req->iv);
+ req = &subreq;
}
- return err;
+ err = ppc_xts_crypt(req, false);
+ if (err || !tail)
+ return err;
+
+ scatterwalk_map_and_copy(b[1], req->src, offset, AES_BLOCK_SIZE + tail, 0);
+
+ spe_begin();
+ if (!offset)
+ ppc_encrypt_ecb(req->iv, req->iv, ctx->key_twk, ctx->rounds,
+ AES_BLOCK_SIZE);
+
+ gf128mul_x_ble(&twk, (le128 *)req->iv);
+
+ ppc_decrypt_xts(b[1], b[1], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ (u8 *)&twk, NULL);
+ memcpy(b[0], b[2], tail);
+ memcpy(b[0] + tail, b[1] + tail, AES_BLOCK_SIZE - tail);
+ ppc_decrypt_xts(b[0], b[0], ctx->key_dec, ctx->rounds, AES_BLOCK_SIZE,
+ req->iv, NULL);
+ spe_end();
+
+ scatterwalk_map_and_copy(b[0], req->dst, offset, AES_BLOCK_SIZE + tail, 1);
+
+ return 0;
}
/*
@@ -388,9 +418,9 @@ static int ppc_xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
* This improves IPsec thoughput by another few percent. Additionally we assume
* that AES context is always aligned to at least 8 bytes because it is created
* with kmalloc() in the crypto infrastructure
- *
*/
-static struct crypto_alg aes_algs[] = { {
+
+static struct crypto_alg aes_cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-ppc-spe",
.cra_priority = 300,
@@ -408,96 +438,84 @@ static struct crypto_alg aes_algs[] = { {
.cia_decrypt = ppc_aes_decrypt
}
}
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ecb_encrypt,
- .decrypt = ppc_ecb_decrypt,
- }
- }
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_cbc_encrypt,
- .decrypt = ppc_cbc_decrypt,
- }
- }
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct ppc_aes_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_aes_setkey,
- .encrypt = ppc_ctr_crypt,
- .decrypt = ppc_ctr_crypt,
- }
- }
-}, {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-ppc-spe",
- .cra_priority = 300,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct ppc_xts_ctx),
- .cra_alignmask = 0,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE * 2,
- .max_keysize = AES_MAX_KEY_SIZE * 2,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ppc_xts_setkey,
- .encrypt = ppc_xts_encrypt,
- .decrypt = ppc_xts_decrypt,
- }
+};
+
+static struct skcipher_alg aes_skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ecb_encrypt,
+ .decrypt = ppc_ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_cbc_encrypt,
+ .decrypt = ppc_cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct ppc_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_aes_setkey_skcipher,
+ .encrypt = ppc_ctr_crypt,
+ .decrypt = ppc_ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }, {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-ppc-spe",
+ .base.cra_priority = 300,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct ppc_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE * 2,
+ .max_keysize = AES_MAX_KEY_SIZE * 2,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ppc_xts_setkey,
+ .encrypt = ppc_xts_encrypt,
+ .decrypt = ppc_xts_decrypt,
}
-} };
+};
static int __init ppc_aes_mod_init(void)
{
- return crypto_register_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ int err;
+
+ err = crypto_register_alg(&aes_cipher_alg);
+ if (err)
+ return err;
+
+ err = crypto_register_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&aes_cipher_alg);
+ return err;
}
static void __exit ppc_aes_mod_fini(void)
{
- crypto_unregister_algs(aes_algs, ARRAY_SIZE(aes_algs));
+ crypto_unregister_alg(&aes_cipher_alg);
+ crypto_unregister_skciphers(aes_skcipher_algs,
+ ARRAY_SIZE(aes_skcipher_algs));
}
module_init(ppc_aes_mod_init);
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 8561498e653c..d84d1417ddb6 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -152,9 +152,12 @@ void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
/* Patch sites */
extern s32 patch__call_flush_count_cache;
extern s32 patch__flush_count_cache_return;
+extern s32 patch__flush_link_stack_return;
+extern s32 patch__call_kvm_flush_link_stack;
extern s32 patch__memset_nocache, patch__memcpy_nocache;
extern long flush_count_cache;
+extern long kvm_flush_link_stack;
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
void kvmppc_save_tm_hv(struct kvm_vcpu *vcpu, u64 msr, bool preserve_nv);
diff --git a/arch/powerpc/include/asm/local.h b/arch/powerpc/include/asm/local.h
index fdd00939270b..bc4bd19b7fc2 100644
--- a/arch/powerpc/include/asm/local.h
+++ b/arch/powerpc/include/asm/local.h
@@ -17,7 +17,7 @@ typedef struct
#define LOCAL_INIT(i) { (i) }
-static __inline__ long local_read(local_t *l)
+static __inline__ long local_read(const local_t *l)
{
return READ_ONCE(l->v);
}
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 759597bf0fd8..ccf44c135389 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -81,6 +81,9 @@ static inline bool security_ftr_enabled(unsigned long feature)
// Software required to flush count cache on context switch
#define SEC_FTR_FLUSH_COUNT_CACHE 0x0000000000000400ull
+// Software required to flush link stack on context switch
+#define SEC_FTR_FLUSH_LINK_STACK 0x0000000000001000ull
+
// Features enabled by default
#define SEC_FTR_DEFAULT \
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6467bdab8d40..3fd3ef352e3f 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -537,6 +537,7 @@ flush_count_cache:
/* Save LR into r9 */
mflr r9
+ // Flush the link stack
.rept 64
bl .+4
.endr
@@ -546,6 +547,11 @@ flush_count_cache:
.balign 32
/* Restore LR */
1: mtlr r9
+
+ // If we're just flushing the link stack, return here
+3: nop
+ patch_site 3b patch__flush_link_stack_return
+
li r9,0x7fff
mtctr r9
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index 7cfcb294b11c..bd91dceb7010 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -24,6 +24,7 @@ enum count_cache_flush_type {
COUNT_CACHE_FLUSH_HW = 0x4,
};
static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+static bool link_stack_flush_enabled;
bool barrier_nospec_enabled;
static bool no_nospec;
@@ -212,11 +213,19 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c
if (ccd)
seq_buf_printf(&s, "Indirect branch cache disabled");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
seq_buf_printf(&s, "Mitigation: Software count cache flush");
if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
seq_buf_printf(&s, " (hardware accelerated)");
+
+ if (link_stack_flush_enabled)
+ seq_buf_printf(&s, ", Software link stack flush");
+
} else if (btb_flush_enabled) {
seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
} else {
@@ -377,18 +386,49 @@ static __init int stf_barrier_debugfs_init(void)
device_initcall(stf_barrier_debugfs_init);
#endif /* CONFIG_DEBUG_FS */
+static void no_count_cache_flush(void)
+{
+ count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
+ pr_info("count-cache-flush: software flush disabled.\n");
+}
+
static void toggle_count_cache_flush(bool enable)
{
- if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
+ !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
+ enable = false;
+
+ if (!enable) {
patch_instruction_site(&patch__call_flush_count_cache, PPC_INST_NOP);
- count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
- pr_info("count-cache-flush: software flush disabled.\n");
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ patch_instruction_site(&patch__call_kvm_flush_link_stack, PPC_INST_NOP);
+#endif
+ pr_info("link-stack-flush: software flush disabled.\n");
+ link_stack_flush_enabled = false;
+ no_count_cache_flush();
return;
}
+ // This enables the branch from _switch to flush_count_cache
patch_branch_site(&patch__call_flush_count_cache,
(u64)&flush_count_cache, BRANCH_SET_LINK);
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ // This enables the branch from guest_exit_cont to kvm_flush_link_stack
+ patch_branch_site(&patch__call_kvm_flush_link_stack,
+ (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
+#endif
+
+ pr_info("link-stack-flush: software flush enabled.\n");
+ link_stack_flush_enabled = true;
+
+ // If we just need to flush the link stack, patch an early return
+ if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+ patch_instruction_site(&patch__flush_link_stack_return, PPC_INST_BLR);
+ no_count_cache_flush();
+ return;
+ }
+
if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
pr_info("count-cache-flush: full software flush sequence enabled.\n");
@@ -407,11 +447,20 @@ void setup_count_cache_flush(void)
if (no_spectrev2 || cpu_mitigations_off()) {
if (security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED) ||
security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED))
- pr_warn("Spectre v2 mitigations not under software control, can't disable\n");
+ pr_warn("Spectre v2 mitigations not fully under software control, can't disable\n");
enable = false;
}
+ /*
+ * There's no firmware feature flag/hypervisor bit to tell us we need to
+ * flush the link stack on context switch. So we set it here if we see
+ * either of the Spectre v2 mitigations that aim to protect userspace.
+ */
+ if (security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED) ||
+ security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
+ security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
+
toggle_count_cache_flush(enable);
}
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 694522308cd5..84827da01d45 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -338,7 +338,7 @@ static unsigned long vtime_delta(struct task_struct *tsk,
return stime;
}
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
{
unsigned long stime, stime_scaled, steal_time;
struct cpu_accounting_data *acct = get_accounting(tsk);
@@ -366,7 +366,7 @@ void vtime_account_system(struct task_struct *tsk)
#endif
}
}
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
void vtime_account_idle(struct task_struct *tsk)
{
@@ -395,7 +395,7 @@ static void vtime_flush_scaled(struct task_struct *tsk,
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
- * Assumes that vtime_account_system/idle() has been called
+ * Assumes that vtime_account_kernel/idle() has been called
* recently (i.e. since the last entry from usermode) so that
* get_paca()->user_time_scaled is up to date.
*/
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 060a1acd7c6d..8834220036a5 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -6,6 +6,8 @@
#endif
#define BSS_FIRST_SECTIONS *(.bss.prominit)
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 0
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
@@ -18,22 +20,8 @@
ENTRY(_stext)
PHDRS {
- kernel PT_LOAD FLAGS(7); /* RWX */
- notes PT_NOTE FLAGS(0);
- dummy PT_NOTE FLAGS(0);
-
- /* binutils < 2.18 has a bug that makes it misbehave when taking an
- ELF file with all segments at load address 0 as input. This
- happens when running "strip" on vmlinux, because of the AT() magic
- in this linker script. People using GCC >= 4.2 won't run into
- this problem, because the "build-id" support will put some data
- into the "notes" segment (at a non-zero load address).
-
- To work around this, we force some data into both the "dummy"
- segment and the kernel segment, so the dummy segment will get a
- non-zero load address. It's not enough to always create the
- "notes" segment, since if nothing gets assigned to it, its load
- address will be zero. */
+ text PT_LOAD FLAGS(7); /* RWX */
+ note PT_NOTE FLAGS(0);
}
#ifdef CONFIG_PPC64
@@ -77,7 +65,7 @@ SECTIONS
#else /* !CONFIG_PPC64 */
HEAD_TEXT
#endif
- } :kernel
+ } :text
__head_end = .;
@@ -126,7 +114,7 @@ SECTIONS
__got2_end = .;
#endif /* CONFIG_PPC32 */
- } :kernel
+ } :text
. = ALIGN(ETEXT_ALIGN_SIZE);
_etext = .;
@@ -175,17 +163,6 @@ SECTIONS
__stop__btb_flush_fixup = .;
}
#endif
- EXCEPTION_TABLE(0)
-
- NOTES :kernel :notes
-
- /* The dummy segment contents for the bug workaround mentioned above
- near PHDRS. */
- .dummy : AT(ADDR(.dummy) - LOAD_OFFSET) {
- LONG(0)
- LONG(0)
- LONG(0)
- } :kernel :dummy
/*
* Init sections discarded at runtime
@@ -200,7 +177,7 @@ SECTIONS
#ifdef CONFIG_PPC64
*(.tramp.ftrace.init);
#endif
- } :kernel
+ } :text
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index faebcbb8c4db..0496e66aaa56 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -11,6 +11,7 @@
*/
#include <asm/ppc_asm.h>
+#include <asm/code-patching-asm.h>
#include <asm/kvm_asm.h>
#include <asm/reg.h>
#include <asm/mmu.h>
@@ -1487,6 +1488,13 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
1:
#endif /* CONFIG_KVM_XICS */
+ /*
+ * Possibly flush the link stack here, before we do a blr in
+ * guest_exit_short_path.
+ */
+1: nop
+ patch_site 1b patch__call_kvm_flush_link_stack
+
/* If we came in through the P9 short path, go back out to C now */
lwz r0, STACK_SLOT_SHORT_PATH(r1)
cmpwi r0, 0
@@ -1963,6 +1971,28 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtlr r0
blr
+.balign 32
+.global kvm_flush_link_stack
+kvm_flush_link_stack:
+ /* Save LR into r0 */
+ mflr r0
+
+ /* Flush the link stack. On Power8 it's up to 32 entries in size. */
+ .rept 32
+ bl .+4
+ .endr
+
+ /* And on Power9 it's up to 64. */
+BEGIN_FTR_SECTION
+ .rept 32
+ bl .+4
+ .endr
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
+ /* Restore LR */
+ mtlr r0
+ blr
+
kvmppc_guest_external:
/* External interrupt, first check for host_ipi. If this is
* set, we know the host wants us out so let's do it now
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index ca92e01d0bd1..48604625ab31 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -96,7 +96,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
{
return 0;
}
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { }
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp) { }
static inline u32 perf_get_misc_flags(struct pt_regs *regs)
{
return 0;
@@ -127,7 +127,7 @@ static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
static inline void power_pmu_bhrb_disable(struct perf_event *event) {}
static void power_pmu_sched_task(struct perf_event_context *ctx, bool sched_in) {}
-static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {}
+static inline void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw) {}
static void pmao_restore_workaround(bool ebb) { }
#endif /* CONFIG_PPC32 */
@@ -179,7 +179,7 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
* pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the
* [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER.
*/
-static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
+static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *regs, u64 *addrp)
{
unsigned long mmcra = regs->dsisr;
bool sdar_valid;
@@ -204,8 +204,7 @@ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp)
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
*addrp = mfspr(SPRN_SDAR);
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
- is_kernel_addr(mfspr(SPRN_SDAR)))
+ if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
*addrp = 0;
}
@@ -444,7 +443,7 @@ static __u64 power_pmu_bhrb_to(u64 addr)
}
/* Processing BHRB entries */
-static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
+static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *cpuhw)
{
u64 val;
u64 addr;
@@ -472,8 +471,7 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
* exporting it to userspace (avoid exposure of regions
* where we could have speculative execution)
*/
- if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
- is_kernel_addr(addr))
+ if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
continue;
/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -2087,12 +2085,12 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
if (event->attr.sample_type &
(PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR))
- perf_get_data_addr(regs, &data.addr);
+ perf_get_data_addr(event, regs, &data.addr);
if (event->attr.sample_type & PERF_SAMPLE_BRANCH_STACK) {
struct cpu_hw_events *cpuhw;
cpuhw = this_cpu_ptr(&cpu_hw_events);
- power_pmu_bhrb_read(cpuhw);
+ power_pmu_bhrb_read(event, cpuhw);
data.br_stack = &cpuhw->bhrb_stack;
}
diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c
index 2b87480f2837..eab8aa293743 100644
--- a/arch/powerpc/platforms/pseries/dtl.c
+++ b/arch/powerpc/platforms/pseries/dtl.c
@@ -19,7 +19,6 @@
struct dtl {
struct dtl_entry *buf;
- struct dentry *file;
int cpu;
int buf_entries;
u64 last_idx;
@@ -320,46 +319,28 @@ static const struct file_operations dtl_fops = {
static struct dentry *dtl_dir;
-static int dtl_setup_file(struct dtl *dtl)
+static void dtl_setup_file(struct dtl *dtl)
{
char name[10];
sprintf(name, "cpu-%d", dtl->cpu);
- dtl->file = debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
- if (!dtl->file)
- return -ENOMEM;
-
- return 0;
+ debugfs_create_file(name, 0400, dtl_dir, dtl, &dtl_fops);
}
static int dtl_init(void)
{
- struct dentry *event_mask_file, *buf_entries_file;
- int rc, i;
+ int i;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -ENODEV;
/* set up common debugfs structure */
- rc = -ENOMEM;
dtl_dir = debugfs_create_dir("dtl", powerpc_debugfs_root);
- if (!dtl_dir) {
- printk(KERN_WARNING "%s: can't create dtl root dir\n",
- __func__);
- goto err;
- }
- event_mask_file = debugfs_create_x8("dtl_event_mask", 0600,
- dtl_dir, &dtl_event_mask);
- buf_entries_file = debugfs_create_u32("dtl_buf_entries", 0400,
- dtl_dir, &dtl_buf_entries);
-
- if (!event_mask_file || !buf_entries_file) {
- printk(KERN_WARNING "%s: can't create dtl files\n", __func__);
- goto err_remove_dir;
- }
+ debugfs_create_x8("dtl_event_mask", 0600, dtl_dir, &dtl_event_mask);
+ debugfs_create_u32("dtl_buf_entries", 0400, dtl_dir, &dtl_buf_entries);
/* set up the per-cpu log structures */
for_each_possible_cpu(i) {
@@ -367,16 +348,9 @@ static int dtl_init(void)
spin_lock_init(&dtl->lock);
dtl->cpu = i;
- rc = dtl_setup_file(dtl);
- if (rc)
- goto err_remove_dir;
+ dtl_setup_file(dtl);
}
return 0;
-
-err_remove_dir:
- debugfs_remove_recursive(dtl_dir);
-err:
- return rc;
}
machine_arch_initcall(pseries, dtl_init);
diff --git a/arch/powerpc/platforms/pseries/hvCall_inst.c b/arch/powerpc/platforms/pseries/hvCall_inst.c
index bcc1b67417a8..c40c62ec432e 100644
--- a/arch/powerpc/platforms/pseries/hvCall_inst.c
+++ b/arch/powerpc/platforms/pseries/hvCall_inst.c
@@ -129,7 +129,6 @@ static void probe_hcall_exit(void *ignored, unsigned long opcode, long retval,
static int __init hcall_inst_init(void)
{
struct dentry *hcall_root;
- struct dentry *hcall_file;
char cpu_name_buf[CPU_NAME_BUF_SIZE];
int cpu;
@@ -145,17 +144,12 @@ static int __init hcall_inst_init(void)
}
hcall_root = debugfs_create_dir(HCALL_ROOT_DIR, NULL);
- if (!hcall_root)
- return -ENOMEM;
for_each_possible_cpu(cpu) {
snprintf(cpu_name_buf, CPU_NAME_BUF_SIZE, "cpu%d", cpu);
- hcall_file = debugfs_create_file(cpu_name_buf, 0444,
- hcall_root,
- per_cpu(hcall_stats, cpu),
- &hcall_inst_seq_fops);
- if (!hcall_file)
- return -ENOMEM;
+ debugfs_create_file(cpu_name_buf, 0444, hcall_root,
+ per_cpu(hcall_stats, cpu),
+ &hcall_inst_seq_fops);
}
return 0;
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f87a5c64e24d..f9f57c55655e 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1998,24 +1998,11 @@ static int __init vpa_debugfs_init(void)
return 0;
vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
- if (!vpa_dir) {
- pr_warn("%s: can't create vpa root dir\n", __func__);
- return -ENOMEM;
- }
/* set up the per-cpu vpa file*/
for_each_possible_cpu(i) {
- struct dentry *d;
-
sprintf(name, "cpu-%ld", i);
-
- d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
- &vpa_fops);
- if (!d) {
- pr_warn("%s: can't create per-cpu vpa file\n",
- __func__);
- return -ENOMEM;
- }
+ debugfs_create_file(name, 0400, vpa_dir, (void *)i, &vpa_fops);
}
return 0;
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8eebbc8860bb..9f7f5dce2dc4 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -26,14 +26,15 @@ config RISCV
select GENERIC_IRQ_SHOW
select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK
- select GENERIC_STRNCPY_FROM_USER
- select GENERIC_STRNLEN_USER
+ select GENERIC_STRNCPY_FROM_USER if MMU
+ select GENERIC_STRNLEN_USER if MMU
select GENERIC_SMP_IDLE_THREAD
select GENERIC_ATOMIC64 if !64BIT
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ASM_MODVERSIONS
select HAVE_MEMBLOCK_NODE_MAP
- select HAVE_DMA_CONTIGUOUS
+ select HAVE_DMA_CONTIGUOUS if MMU
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_PERF_EVENTS
select HAVE_PERF_REGS
@@ -50,6 +51,7 @@ config RISCV
select PCI_DOMAINS_GENERIC if PCI
select PCI_MSI if PCI
select RISCV_TIMER
+ select UACCESS_MEMCPY if !MMU
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_ARCH_TOPOLOGY if SMP
select ARCH_HAS_PTE_SPECIAL
@@ -60,7 +62,7 @@ config RISCV
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select SPARSEMEM_STATIC if 32BIT
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
- select HAVE_ARCH_MMAP_RND_BITS
+ select HAVE_ARCH_MMAP_RND_BITS if MMU
config ARCH_MMAP_RND_BITS_MIN
default 18 if 64BIT
@@ -72,8 +74,23 @@ config ARCH_MMAP_RND_BITS_MAX
default 24 if 64BIT # SV39 based
default 17
+# set if we run in machine mode, cleared if we run in supervisor mode
+config RISCV_M_MODE
+ bool
+ default !MMU
+
+# set if we are running in S-mode and can use SBI calls
+config RISCV_SBI
+ bool
+ depends on !RISCV_M_MODE
+ default y
+
config MMU
- def_bool y
+ bool "MMU-based Paged Memory Management Support"
+ default y
+ help
+ Select if you want MMU-based virtualised addressing space
+ support by paged memory management. If unsure, say 'Y'.
config ZONE_DMA32
bool
@@ -92,6 +109,7 @@ config PA_BITS
config PAGE_OFFSET
hex
default 0xC0000000 if 32BIT && MAXPHYSMEM_2GB
+ default 0x80000000 if 64BIT && !MMU
default 0xffffffff80000000 if 64BIT && MAXPHYSMEM_2GB
default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
@@ -135,7 +153,7 @@ config GENERIC_HWEIGHT
def_bool y
config FIX_EARLYCON_MEM
- def_bool y
+ def_bool CONFIG_MMU
config PGTABLE_LEVELS
int
@@ -160,17 +178,18 @@ config ARCH_RV32I
select GENERIC_LIB_ASHRDI3
select GENERIC_LIB_LSHRDI3
select GENERIC_LIB_UCMPDI2
+ select MMU
config ARCH_RV64I
bool "RV64I"
select 64BIT
- select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FTRACE_MCOUNT_RECORD
- select HAVE_DYNAMIC_FTRACE
- select HAVE_DYNAMIC_FTRACE_WITH_REGS
- select SWIOTLB
+ select HAVE_DYNAMIC_FTRACE if MMU
+ select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+ select SWIOTLB if MMU
endchoice
@@ -272,6 +291,19 @@ menu "Kernel features"
source "kernel/Kconfig.hz"
+config SECCOMP
+ bool "Enable seccomp to safely compute untrusted bytecode"
+ help
+ This kernel feature is useful for number crunching applications
+ that may need to compute untrusted bytecode during their
+ execution. By using pipes or other transports made available to
+ the process as file descriptors supporting the read/write
+ syscalls, it's possible to isolate those applications in
+ their own address space using seccomp. Once seccomp is
+ enabled via prctl(PR_SET_SECCOMP), it cannot be disabled
+ and the task is only allowed to execute a few safe syscalls
+ defined by each seccomp mode.
+
endmenu
menu "Boot options"
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index f5e914210245..b9009a2fbaf5 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -83,13 +83,18 @@ PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
-all: Image.gz
+ifeq ($(CONFIG_RISCV_M_MODE),y)
+KBUILD_IMAGE := $(boot)/loader
+else
+KBUILD_IMAGE := $(boot)/Image.gz
+endif
+BOOT_TARGETS := Image Image.gz loader
-Image: vmlinux
- $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+all: $(notdir $(KBUILD_IMAGE))
-Image.%: Image
+$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+ @$(kecho) ' Kernel: $(boot)/$@ is ready'
zinstall install:
$(Q)$(MAKE) $(build)=$(boot) $@
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index 0990a9fdbe5d..a474f98ce4fa 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -16,7 +16,7 @@
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
-targets := Image
+targets := Image loader
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
@@ -24,6 +24,23 @@ $(obj)/Image: vmlinux FORCE
$(obj)/Image.gz: $(obj)/Image FORCE
$(call if_changed,gzip)
+loader.o: $(src)/loader.S $(obj)/Image
+
+$(obj)/loader: $(obj)/loader.o $(obj)/Image $(obj)/loader.lds FORCE
+ $(Q)$(LD) -T $(obj)/loader.lds -o $@ $(obj)/loader.o
+
+$(obj)/Image.bz2: $(obj)/Image FORCE
+ $(call if_changed,bzip2)
+
+$(obj)/Image.lz4: $(obj)/Image FORCE
+ $(call if_changed,lz4)
+
+$(obj)/Image.lzma: $(obj)/Image FORCE
+ $(call if_changed,lzma)
+
+$(obj)/Image.lzo: $(obj)/Image FORCE
+ $(call if_changed,lzo)
+
install:
$(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
$(obj)/Image System.map "$(INSTALL_PATH)"
diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
index afa43c7ea369..70a1891e7cd0 100644
--- a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
+++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi
@@ -162,6 +162,13 @@
clocks = <&prci PRCI_CLK_TLCLK>;
status = "disabled";
};
+ dma: dma@3000000 {
+ compatible = "sifive,fu540-c000-pdma";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic0>;
+ interrupts = <23 24 25 26 27 28 29 30>;
+ #dma-cells = <1>;
+ };
uart1: serial@10011000 {
compatible = "sifive,fu540-c000-uart", "sifive,uart0";
reg = <0x0 0x10011000 0x0 0x1000>;
diff --git a/arch/riscv/boot/loader.S b/arch/riscv/boot/loader.S
new file mode 100644
index 000000000000..dcf88cf44dc1
--- /dev/null
+++ b/arch/riscv/boot/loader.S
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+ .align 4
+ .section .payload, "ax", %progbits
+ .globl _start
+_start:
+ .incbin "arch/riscv/boot/Image"
+
diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S
new file mode 100644
index 000000000000..47a5003c2e28
--- /dev/null
+++ b/arch/riscv/boot/loader.lds.S
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/page.h>
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+SECTIONS
+{
+ . = PAGE_OFFSET;
+
+ .payload : {
+ *(.payload)
+ . = ALIGN(8);
+ }
+}
diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig
new file mode 100644
index 000000000000..cf74e179bf90
--- /dev/null
+++ b/arch/riscv/configs/nommu_virt_defconfig
@@ -0,0 +1,78 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=16
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_MEMBARRIER is not set
+# CONFIG_KALLSYMS is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_SLAB_MERGE_DEFAULT is not set
+# CONFIG_MMU is not set
+CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SMP=y
+CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_MSDOS_PARTITION is not set
+# CONFIG_EFI_PARTITION is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+CONFIG_VIRTIO_BLK=y
+# CONFIG_INPUT_KEYBOARD is not set
+# CONFIG_INPUT_MOUSE is not set
+# CONFIG_SERIO is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_DEVMEM is not set
+CONFIG_SERIAL_8250=y
+# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_NR_UARTS=1
+CONFIG_SERIAL_8250_RUNTIME_UARTS=1
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_LCD_CLASS_DEVICE is not set
+# CONFIG_BACKLIGHT_CLASS_DEVICE is not set
+# CONFIG_VGA_CONSOLE is not set
+# CONFIG_HID is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_SIFIVE_PLIC=y
+# CONFIG_VALIDATE_FS_PARSER is not set
+CONFIG_EXT2_FS=y
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index c9fecd120d18..dd62b691c443 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -1,5 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_RISCV_PROTOTYPES_H
+#define _ASM_RISCV_PROTOTYPES_H
#include <linux/ftrace.h>
#include <asm-generic/asm-prototypes.h>
diff --git a/arch/riscv/include/asm/cache.h b/arch/riscv/include/asm/cache.h
index bfd523e8f0b2..9b58b104559e 100644
--- a/arch/riscv/include/asm/cache.h
+++ b/arch/riscv/include/asm/cache.h
@@ -11,4 +11,12 @@
#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT)
+/*
+ * RISC-V requires the stack pointer to be 16-byte aligned, so ensure that
+ * the flat loader aligns it accordingly.
+ */
+#ifndef CONFIG_MMU
+#define ARCH_SLAB_MINALIGN 16
+#endif
+
#endif /* _ASM_RISCV_CACHE_H */
diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h
new file mode 100644
index 000000000000..6eaa2eedd694
--- /dev/null
+++ b/arch/riscv/include/asm/clint.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_CLINT_H
+#define _ASM_RISCV_CLINT_H 1
+
+#include <linux/io.h>
+#include <linux/smp.h>
+
+#ifdef CONFIG_RISCV_M_MODE
+extern u32 __iomem *clint_ipi_base;
+
+void clint_init_boot_cpu(void);
+
+static inline void clint_send_ipi_single(unsigned long hartid)
+{
+ writel(1, clint_ipi_base + hartid);
+}
+
+static inline void clint_send_ipi_mask(const struct cpumask *hartid_mask)
+{
+ int hartid;
+
+ for_each_cpu(hartid, hartid_mask)
+ clint_send_ipi_single(hartid);
+}
+
+static inline void clint_clear_ipi(unsigned long hartid)
+{
+ writel(0, clint_ipi_base + hartid);
+}
+#else /* CONFIG_RISCV_M_MODE */
+#define clint_init_boot_cpu() do { } while (0)
+
+/* stubs to for code is only reachable under IS_ENABLED(CONFIG_RISCV_M_MODE): */
+void clint_send_ipi_single(unsigned long hartid);
+void clint_send_ipi_mask(const struct cpumask *hartid_mask);
+void clint_clear_ipi(unsigned long hartid);
+#endif /* CONFIG_RISCV_M_MODE */
+
+#endif /* _ASM_RISCV_CLINT_H */
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index a18923fa23c8..0a62d2d68455 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -11,8 +11,11 @@
/* Status register flags */
#define SR_SIE _AC(0x00000002, UL) /* Supervisor Interrupt Enable */
+#define SR_MIE _AC(0x00000008, UL) /* Machine Interrupt Enable */
#define SR_SPIE _AC(0x00000020, UL) /* Previous Supervisor IE */
+#define SR_MPIE _AC(0x00000080, UL) /* Previous Machine IE */
#define SR_SPP _AC(0x00000100, UL) /* Previously Supervisor */
+#define SR_MPP _AC(0x00001800, UL) /* Previously Machine */
#define SR_SUM _AC(0x00040000, UL) /* Supervisor User Memory Access */
#define SR_FS _AC(0x00006000, UL) /* Floating-point Status */
@@ -44,9 +47,10 @@
#define SATP_MODE SATP_MODE_39
#endif
-/* SCAUSE */
-#define SCAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
+/* Exception cause high bit - is an interrupt if set */
+#define CAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1))
+/* Interrupt causes (minus the high bit) */
#define IRQ_U_SOFT 0
#define IRQ_S_SOFT 1
#define IRQ_M_SOFT 3
@@ -57,6 +61,7 @@
#define IRQ_S_EXT 9
#define IRQ_M_EXT 11
+/* Exception causes */
#define EXC_INST_MISALIGNED 0
#define EXC_INST_ACCESS 1
#define EXC_BREAKPOINT 3
@@ -67,14 +72,14 @@
#define EXC_LOAD_PAGE_FAULT 13
#define EXC_STORE_PAGE_FAULT 15
-/* SIE (Interrupt Enable) and SIP (Interrupt Pending) flags */
-#define SIE_SSIE (_AC(0x1, UL) << IRQ_S_SOFT)
-#define SIE_STIE (_AC(0x1, UL) << IRQ_S_TIMER)
-#define SIE_SEIE (_AC(0x1, UL) << IRQ_S_EXT)
-
+/* symbolic CSR names: */
#define CSR_CYCLE 0xc00
#define CSR_TIME 0xc01
#define CSR_INSTRET 0xc02
+#define CSR_CYCLEH 0xc80
+#define CSR_TIMEH 0xc81
+#define CSR_INSTRETH 0xc82
+
#define CSR_SSTATUS 0x100
#define CSR_SIE 0x104
#define CSR_STVEC 0x105
@@ -85,9 +90,58 @@
#define CSR_STVAL 0x143
#define CSR_SIP 0x144
#define CSR_SATP 0x180
-#define CSR_CYCLEH 0xc80
-#define CSR_TIMEH 0xc81
-#define CSR_INSTRETH 0xc82
+
+#define CSR_MSTATUS 0x300
+#define CSR_MISA 0x301
+#define CSR_MIE 0x304
+#define CSR_MTVEC 0x305
+#define CSR_MSCRATCH 0x340
+#define CSR_MEPC 0x341
+#define CSR_MCAUSE 0x342
+#define CSR_MTVAL 0x343
+#define CSR_MIP 0x344
+#define CSR_MHARTID 0xf14
+
+#ifdef CONFIG_RISCV_M_MODE
+# define CSR_STATUS CSR_MSTATUS
+# define CSR_IE CSR_MIE
+# define CSR_TVEC CSR_MTVEC
+# define CSR_SCRATCH CSR_MSCRATCH
+# define CSR_EPC CSR_MEPC
+# define CSR_CAUSE CSR_MCAUSE
+# define CSR_TVAL CSR_MTVAL
+# define CSR_IP CSR_MIP
+
+# define SR_IE SR_MIE
+# define SR_PIE SR_MPIE
+# define SR_PP SR_MPP
+
+# define IRQ_SOFT IRQ_M_SOFT
+# define IRQ_TIMER IRQ_M_TIMER
+# define IRQ_EXT IRQ_M_EXT
+#else /* CONFIG_RISCV_M_MODE */
+# define CSR_STATUS CSR_SSTATUS
+# define CSR_IE CSR_SIE
+# define CSR_TVEC CSR_STVEC
+# define CSR_SCRATCH CSR_SSCRATCH
+# define CSR_EPC CSR_SEPC
+# define CSR_CAUSE CSR_SCAUSE
+# define CSR_TVAL CSR_STVAL
+# define CSR_IP CSR_SIP
+
+# define SR_IE SR_SIE
+# define SR_PIE SR_SPIE
+# define SR_PP SR_SPP
+
+# define IRQ_SOFT IRQ_S_SOFT
+# define IRQ_TIMER IRQ_S_TIMER
+# define IRQ_EXT IRQ_S_EXT
+#endif /* CONFIG_RISCV_M_MODE */
+
+/* IE/IP (Supervisor/Machine Interrupt Enable/Pending) flags */
+#define IE_SIE (_AC(0x1, UL) << IRQ_SOFT)
+#define IE_TIE (_AC(0x1, UL) << IRQ_TIMER)
+#define IE_EIE (_AC(0x1, UL) << IRQ_EXT)
#ifndef __ASSEMBLY__
diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h
index 44dcf7fc15ee..dd973efe5d7c 100644
--- a/arch/riscv/include/asm/current.h
+++ b/arch/riscv/include/asm/current.h
@@ -7,8 +7,8 @@
*/
-#ifndef __ASM_CURRENT_H
-#define __ASM_CURRENT_H
+#ifndef _ASM_RISCV_CURRENT_H
+#define _ASM_RISCV_CURRENT_H
#include <linux/bug.h>
#include <linux/compiler.h>
@@ -34,4 +34,4 @@ static __always_inline struct task_struct *get_current(void)
#endif /* __ASSEMBLY__ */
-#endif /* __ASM_CURRENT_H */
+#endif /* _ASM_RISCV_CURRENT_H */
diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h
index ef04084bf0de..d83a4efd052b 100644
--- a/arch/riscv/include/asm/elf.h
+++ b/arch/riscv/include/asm/elf.h
@@ -56,16 +56,16 @@ extern unsigned long elf_hwcap;
*/
#define ELF_PLATFORM (NULL)
+#ifdef CONFIG_MMU
#define ARCH_DLINFO \
do { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, \
(elf_addr_t)current->mm->context.vdso); \
} while (0)
-
-
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int uses_interp);
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h
index 161f28d04a07..42d2c42f3cc9 100644
--- a/arch/riscv/include/asm/fixmap.h
+++ b/arch/riscv/include/asm/fixmap.h
@@ -11,6 +11,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
+#ifdef CONFIG_MMU
/*
* Here we define all the compile-time 'special' virtual addresses.
* The point is to have a constant address at compile time, but to
@@ -42,4 +43,5 @@ extern void __set_fixmap(enum fixed_addresses idx,
#include <asm-generic/fixmap.h>
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_FIXMAP_H */
diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h
index c6dcc5291f97..ace8a6e2d11d 100644
--- a/arch/riscv/include/asm/ftrace.h
+++ b/arch/riscv/include/asm/ftrace.h
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (C) 2017 Andes Technology Corporation */
+#ifndef _ASM_RISCV_FTRACE_H
+#define _ASM_RISCV_FTRACE_H
+
/*
* The graph frame test is not possible if CONFIG_FRAME_POINTER is not enabled.
* Check arch/riscv/kernel/mcount.S for detail.
@@ -64,3 +67,5 @@ do { \
*/
#define MCOUNT_INSN_SIZE 8
#endif
+
+#endif /* _ASM_RISCV_FTRACE_H */
diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h
index 4ad6409c4647..fdfaf7f3df7c 100644
--- a/arch/riscv/include/asm/futex.h
+++ b/arch/riscv/include/asm/futex.h
@@ -4,14 +4,20 @@
* Copyright (c) 2018 Jim Wilson (jimw@sifive.com)
*/
-#ifndef _ASM_FUTEX_H
-#define _ASM_FUTEX_H
+#ifndef _ASM_RISCV_FUTEX_H
+#define _ASM_RISCV_FUTEX_H
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <linux/errno.h>
#include <asm/asm.h>
+/* We don't even really need the extable code, but for now keep it simple */
+#ifndef CONFIG_MMU
+#define __enable_user_access() do { } while (0)
+#define __disable_user_access() do { } while (0)
+#endif
+
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
{ \
uintptr_t tmp; \
@@ -112,4 +118,4 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
return ret;
}
-#endif /* _ASM_FUTEX_H */
+#endif /* _ASM_RISCV_FUTEX_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 7ecb7c6a57b1..1bb0cd04aec3 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -5,8 +5,8 @@
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2017 SiFive
*/
-#ifndef __ASM_HWCAP_H
-#define __ASM_HWCAP_H
+#ifndef _ASM_RISCV_HWCAP_H
+#define _ASM_RISCV_HWCAP_H
#include <uapi/asm/hwcap.h>
@@ -23,4 +23,5 @@ enum {
extern unsigned long elf_hwcap;
#endif
-#endif
+
+#endif /* _ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/asm/image.h b/arch/riscv/include/asm/image.h
index 344db5244547..7b0f92ba0acc 100644
--- a/arch/riscv/include/asm/image.h
+++ b/arch/riscv/include/asm/image.h
@@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_IMAGE_H
-#define __ASM_IMAGE_H
+#ifndef _ASM_RISCV_IMAGE_H
+#define _ASM_RISCV_IMAGE_H
#define RISCV_IMAGE_MAGIC "RISCV\0\0\0"
#define RISCV_IMAGE_MAGIC2 "RSC\x05"
@@ -62,4 +62,4 @@ struct riscv_image_header {
u32 res4;
};
#endif /* __ASSEMBLY__ */
-#endif /* __ASM_IMAGE_H */
+#endif /* _ASM_RISCV_IMAGE_H */
diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 3ba4d93721d3..0f477206a4ed 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -15,158 +15,19 @@
#include <asm/mmiowb.h>
#include <asm/pgtable.h>
-extern void __iomem *ioremap(phys_addr_t offset, unsigned long size);
-
-/*
- * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
- * change the properties of memory regions. This should be fixed by the
- * upcoming platform spec.
- */
-#define ioremap_nocache(addr, size) ioremap((addr), (size))
-#define ioremap_wc(addr, size) ioremap((addr), (size))
-#define ioremap_wt(addr, size) ioremap((addr), (size))
-
-extern void iounmap(volatile void __iomem *addr);
-
-/* Generic IO read/write. These perform native-endian accesses. */
-#define __raw_writeb __raw_writeb
-static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
-{
- asm volatile("sb %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writew __raw_writew
-static inline void __raw_writew(u16 val, volatile void __iomem *addr)
-{
- asm volatile("sh %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#define __raw_writel __raw_writel
-static inline void __raw_writel(u32 val, volatile void __iomem *addr)
-{
- asm volatile("sw %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-
-#ifdef CONFIG_64BIT
-#define __raw_writeq __raw_writeq
-static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
-{
- asm volatile("sd %0, 0(%1)" : : "r" (val), "r" (addr));
-}
-#endif
-
-#define __raw_readb __raw_readb
-static inline u8 __raw_readb(const volatile void __iomem *addr)
-{
- u8 val;
-
- asm volatile("lb %0, 0(%1)" : "=r" (val) : "r" (addr));
- return val;
-}
-
-#define __raw_readw __raw_readw
-static inline u16 __raw_readw(const volatile void __iomem *addr)
-{
- u16 val;
-
- asm volatile("lh %0, 0(%1)" : "=r" (val) : "r" (addr));
- return val;
-}
-
-#define __raw_readl __raw_readl
-static inline u32 __raw_readl(const volatile void __iomem *addr)
-{
- u32 val;
-
- asm volatile("lw %0, 0(%1)" : "=r" (val) : "r" (addr));
- return val;
-}
-
-#ifdef CONFIG_64BIT
-#define __raw_readq __raw_readq
-static inline u64 __raw_readq(const volatile void __iomem *addr)
-{
- u64 val;
-
- asm volatile("ld %0, 0(%1)" : "=r" (val) : "r" (addr));
- return val;
-}
-#endif
-
/*
- * Unordered I/O memory access primitives. These are even more relaxed than
- * the relaxed versions, as they don't even order accesses between successive
- * operations to the I/O regions.
+ * MMIO access functions are separated out to break dependency cycles
+ * when using {read,write}* fns in low-level headers
*/
-#define readb_cpu(c) ({ u8 __r = __raw_readb(c); __r; })
-#define readw_cpu(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
-#define readl_cpu(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
-
-#define writeb_cpu(v,c) ((void)__raw_writeb((v),(c)))
-#define writew_cpu(v,c) ((void)__raw_writew((__force u16)cpu_to_le16(v),(c)))
-#define writel_cpu(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
-
-#ifdef CONFIG_64BIT
-#define readq_cpu(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
-#define writeq_cpu(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
-#endif
-
-/*
- * Relaxed I/O memory access primitives. These follow the Device memory
- * ordering rules but do not guarantee any ordering relative to Normal memory
- * accesses. These are defined to order the indicated access (either a read or
- * write) with all other I/O memory accesses. Since the platform specification
- * defines that all I/O regions are strongly ordered on channel 2, no explicit
- * fences are required to enforce this ordering.
- */
-/* FIXME: These are now the same as asm-generic */
-#define __io_rbr() do {} while (0)
-#define __io_rar() do {} while (0)
-#define __io_rbw() do {} while (0)
-#define __io_raw() do {} while (0)
-
-#define readb_relaxed(c) ({ u8 __v; __io_rbr(); __v = readb_cpu(c); __io_rar(); __v; })
-#define readw_relaxed(c) ({ u16 __v; __io_rbr(); __v = readw_cpu(c); __io_rar(); __v; })
-#define readl_relaxed(c) ({ u32 __v; __io_rbr(); __v = readl_cpu(c); __io_rar(); __v; })
-
-#define writeb_relaxed(v,c) ({ __io_rbw(); writeb_cpu((v),(c)); __io_raw(); })
-#define writew_relaxed(v,c) ({ __io_rbw(); writew_cpu((v),(c)); __io_raw(); })
-#define writel_relaxed(v,c) ({ __io_rbw(); writel_cpu((v),(c)); __io_raw(); })
-
-#ifdef CONFIG_64BIT
-#define readq_relaxed(c) ({ u64 __v; __io_rbr(); __v = readq_cpu(c); __io_rar(); __v; })
-#define writeq_relaxed(v,c) ({ __io_rbw(); writeq_cpu((v),(c)); __io_raw(); })
-#endif
-
-/*
- * I/O memory access primitives. Reads are ordered relative to any
- * following Normal memory access. Writes are ordered relative to any prior
- * Normal memory access. The memory barriers here are necessary as RISC-V
- * doesn't define any ordering between the memory space and the I/O space.
- */
-#define __io_br() do {} while (0)
-#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory");
-#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory");
-#define __io_aw() mmiowb_set_pending()
-
-#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
-#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
-#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; })
-
-#define writeb(v,c) ({ __io_bw(); writeb_cpu((v),(c)); __io_aw(); })
-#define writew(v,c) ({ __io_bw(); writew_cpu((v),(c)); __io_aw(); })
-#define writel(v,c) ({ __io_bw(); writel_cpu((v),(c)); __io_aw(); })
-
-#ifdef CONFIG_64BIT
-#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; })
-#define writeq(v,c) ({ __io_bw(); writeq_cpu((v),(c)); __io_aw(); })
-#endif
+#include <asm/mmio.h>
/*
* I/O port access constants.
*/
+#ifdef CONFIG_MMU
#define IO_SPACE_LIMIT (PCI_IO_SIZE - 1)
#define PCI_IOBASE ((void __iomem *)PCI_IO_START)
+#endif /* CONFIG_MMU */
/*
* Emulation routines for the port-mapped IO space used by some PCI drivers.
diff --git a/arch/riscv/include/asm/irqflags.h b/arch/riscv/include/asm/irqflags.h
index e70f647ce3b7..08d4d6a5b7e9 100644
--- a/arch/riscv/include/asm/irqflags.h
+++ b/arch/riscv/include/asm/irqflags.h
@@ -13,31 +13,31 @@
/* read interrupt enabled status */
static inline unsigned long arch_local_save_flags(void)
{
- return csr_read(CSR_SSTATUS);
+ return csr_read(CSR_STATUS);
}
/* unconditionally enable interrupts */
static inline void arch_local_irq_enable(void)
{
- csr_set(CSR_SSTATUS, SR_SIE);
+ csr_set(CSR_STATUS, SR_IE);
}
/* unconditionally disable interrupts */
static inline void arch_local_irq_disable(void)
{
- csr_clear(CSR_SSTATUS, SR_SIE);
+ csr_clear(CSR_STATUS, SR_IE);
}
/* get status and disable interrupts */
static inline unsigned long arch_local_irq_save(void)
{
- return csr_read_clear(CSR_SSTATUS, SR_SIE);
+ return csr_read_clear(CSR_STATUS, SR_IE);
}
/* test flags */
static inline int arch_irqs_disabled_flags(unsigned long flags)
{
- return !(flags & SR_SIE);
+ return !(flags & SR_IE);
}
/* test hardware interrupt enable bit */
@@ -49,7 +49,7 @@ static inline int arch_irqs_disabled(void)
/* set interrupt enabled status */
static inline void arch_local_irq_restore(unsigned long flags)
{
- csr_set(CSR_SSTATUS, flags & SR_SIE);
+ csr_set(CSR_STATUS, flags & SR_IE);
}
#endif /* _ASM_RISCV_IRQFLAGS_H */
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 96e30ef637e8..56a98ea30731 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -6,9 +6,9 @@
* Copyright (C) 2017 SiFive
*/
-#ifndef _RISCV_KPROBES_H
-#define _RISCV_KPROBES_H
+#ifndef _ASM_RISCV_KPROBES_H
+#define _ASM_RISCV_KPROBES_H
#include <asm-generic/kprobes.h>
-#endif /* _RISCV_KPROBES_H */
+#endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h
new file mode 100644
index 000000000000..a297a835e402
--- /dev/null
+++ b/arch/riscv/include/asm/mmio.h
@@ -0,0 +1,168 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * {read,write}{b,w,l,q} based on arch/arm64/include/asm/io.h
+ * which was based on arch/arm/include/io.h
+ *
+ * Copyright (C) 1996-2000 Russell King
+ * Copyright (C) 2012 ARM Ltd.
+ * Copyright (C) 2014 Regents of the University of California
+ */
+
+#ifndef _ASM_RISCV_MMIO_H
+#define _ASM_RISCV_MMIO_H
+
+#include <linux/types.h>
+#include <asm/mmiowb.h>
+
+#ifdef CONFIG_MMU
+void __iomem *ioremap(phys_addr_t offset, unsigned long size);
+
+/*
+ * The RISC-V ISA doesn't yet specify how to query or modify PMAs, so we can't
+ * change the properties of memory regions. This should be fixed by the
+ * upcoming platform spec.
+ */
+#define ioremap_nocache(addr, size) ioremap((addr), (size))
+#define ioremap_wc(addr, size) ioremap((addr), (size))
+#define ioremap_wt(addr, size) ioremap((addr), (size))
+
+void iounmap(volatile void __iomem *addr);
+#else
+#define pgprot_noncached(x) (x)
+#endif /* CONFIG_MMU */
+
+/* Generic IO read/write. These perform native-endian accesses. */
+#define __raw_writeb __raw_writeb
+static inline void __raw_writeb(u8 val, volatile void __iomem *addr)
+{
+ asm volatile("sb %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writew __raw_writew
+static inline void __raw_writew(u16 val, volatile void __iomem *addr)
+{
+ asm volatile("sh %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#define __raw_writel __raw_writel
+static inline void __raw_writel(u32 val, volatile void __iomem *addr)
+{
+ asm volatile("sw %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+
+#ifdef CONFIG_64BIT
+#define __raw_writeq __raw_writeq
+static inline void __raw_writeq(u64 val, volatile void __iomem *addr)
+{
+ asm volatile("sd %0, 0(%1)" : : "r" (val), "r" (addr));
+}
+#endif
+
+#define __raw_readb __raw_readb
+static inline u8 __raw_readb(const volatile void __iomem *addr)
+{
+ u8 val;
+
+ asm volatile("lb %0, 0(%1)" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define __raw_readw __raw_readw
+static inline u16 __raw_readw(const volatile void __iomem *addr)
+{
+ u16 val;
+
+ asm volatile("lh %0, 0(%1)" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define __raw_readl __raw_readl
+static inline u32 __raw_readl(const volatile void __iomem *addr)
+{
+ u32 val;
+
+ asm volatile("lw %0, 0(%1)" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#ifdef CONFIG_64BIT
+#define __raw_readq __raw_readq
+static inline u64 __raw_readq(const volatile void __iomem *addr)
+{
+ u64 val;
+
+ asm volatile("ld %0, 0(%1)" : "=r" (val) : "r" (addr));
+ return val;
+}
+#endif
+
+/*
+ * Unordered I/O memory access primitives. These are even more relaxed than
+ * the relaxed versions, as they don't even order accesses between successive
+ * operations to the I/O regions.
+ */
+#define readb_cpu(c) ({ u8 __r = __raw_readb(c); __r; })
+#define readw_cpu(c) ({ u16 __r = le16_to_cpu((__force __le16)__raw_readw(c)); __r; })
+#define readl_cpu(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+
+#define writeb_cpu(v, c) ((void)__raw_writeb((v), (c)))
+#define writew_cpu(v, c) ((void)__raw_writew((__force u16)cpu_to_le16(v), (c)))
+#define writel_cpu(v, c) ((void)__raw_writel((__force u32)cpu_to_le32(v), (c)))
+
+#ifdef CONFIG_64BIT
+#define readq_cpu(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
+#define writeq_cpu(v, c) ((void)__raw_writeq((__force u64)cpu_to_le64(v), (c)))
+#endif
+
+/*
+ * Relaxed I/O memory access primitives. These follow the Device memory
+ * ordering rules but do not guarantee any ordering relative to Normal memory
+ * accesses. These are defined to order the indicated access (either a read or
+ * write) with all other I/O memory accesses. Since the platform specification
+ * defines that all I/O regions are strongly ordered on channel 2, no explicit
+ * fences are required to enforce this ordering.
+ */
+/* FIXME: These are now the same as asm-generic */
+#define __io_rbr() do {} while (0)
+#define __io_rar() do {} while (0)
+#define __io_rbw() do {} while (0)
+#define __io_raw() do {} while (0)
+
+#define readb_relaxed(c) ({ u8 __v; __io_rbr(); __v = readb_cpu(c); __io_rar(); __v; })
+#define readw_relaxed(c) ({ u16 __v; __io_rbr(); __v = readw_cpu(c); __io_rar(); __v; })
+#define readl_relaxed(c) ({ u32 __v; __io_rbr(); __v = readl_cpu(c); __io_rar(); __v; })
+
+#define writeb_relaxed(v, c) ({ __io_rbw(); writeb_cpu((v), (c)); __io_raw(); })
+#define writew_relaxed(v, c) ({ __io_rbw(); writew_cpu((v), (c)); __io_raw(); })
+#define writel_relaxed(v, c) ({ __io_rbw(); writel_cpu((v), (c)); __io_raw(); })
+
+#ifdef CONFIG_64BIT
+#define readq_relaxed(c) ({ u64 __v; __io_rbr(); __v = readq_cpu(c); __io_rar(); __v; })
+#define writeq_relaxed(v, c) ({ __io_rbw(); writeq_cpu((v), (c)); __io_raw(); })
+#endif
+
+/*
+ * I/O memory access primitives. Reads are ordered relative to any
+ * following Normal memory access. Writes are ordered relative to any prior
+ * Normal memory access. The memory barriers here are necessary as RISC-V
+ * doesn't define any ordering between the memory space and the I/O space.
+ */
+#define __io_br() do {} while (0)
+#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory")
+#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory")
+#define __io_aw() mmiowb_set_pending()
+
+#define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; })
+#define readw(c) ({ u16 __v; __io_br(); __v = readw_cpu(c); __io_ar(__v); __v; })
+#define readl(c) ({ u32 __v; __io_br(); __v = readl_cpu(c); __io_ar(__v); __v; })
+
+#define writeb(v, c) ({ __io_bw(); writeb_cpu((v), (c)); __io_aw(); })
+#define writew(v, c) ({ __io_bw(); writew_cpu((v), (c)); __io_aw(); })
+#define writel(v, c) ({ __io_bw(); writel_cpu((v), (c)); __io_aw(); })
+
+#ifdef CONFIG_64BIT
+#define readq(c) ({ u64 __v; __io_br(); __v = readq_cpu(c); __io_ar(__v); __v; })
+#define writeq(v, c) ({ __io_bw(); writeq_cpu((v), (c)); __io_aw(); })
+#endif
+
+#endif /* _ASM_RISCV_MMIO_H */
diff --git a/arch/riscv/include/asm/mmiowb.h b/arch/riscv/include/asm/mmiowb.h
index 5d7e3a2b4e3b..bb4091ff4a21 100644
--- a/arch/riscv/include/asm/mmiowb.h
+++ b/arch/riscv/include/asm/mmiowb.h
@@ -11,4 +11,4 @@
#include <asm-generic/mmiowb.h>
-#endif /* ASM_RISCV_MMIOWB_H */
+#endif /* _ASM_RISCV_MMIOWB_H */
diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 151476fb58cb..967eacb01ab5 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -10,6 +10,9 @@
#ifndef __ASSEMBLY__
typedef struct {
+#ifndef CONFIG_MMU
+ unsigned long end_brk;
+#endif
void *vdso;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 3db261c4810f..ac699246ae7e 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -88,8 +88,14 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif
+#ifdef CONFIG_MMU
extern unsigned long va_pa_offset;
extern unsigned long pfn_base;
+#define ARCH_PFN_OFFSET (pfn_base)
+#else
+#define va_pa_offset 0
+#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
+#endif /* CONFIG_MMU */
extern unsigned long max_low_pfn;
extern unsigned long min_low_pfn;
@@ -112,11 +118,9 @@ extern unsigned long min_low_pfn;
#ifdef CONFIG_FLATMEM
#define pfn_valid(pfn) \
- (((pfn) >= pfn_base) && (((pfn)-pfn_base) < max_mapnr))
+ (((pfn) >= ARCH_PFN_OFFSET) && (((pfn) - ARCH_PFN_OFFSET) < max_mapnr))
#endif
-#define ARCH_PFN_OFFSET (pfn_base)
-
#endif /* __ASSEMBLY__ */
#define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr)))
diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h
index 5ac8daa1cc36..1c473a1bd986 100644
--- a/arch/riscv/include/asm/pci.h
+++ b/arch/riscv/include/asm/pci.h
@@ -3,8 +3,8 @@
* Copyright (C) 2016 SiFive
*/
-#ifndef __ASM_RISCV_PCI_H
-#define __ASM_RISCV_PCI_H
+#ifndef _ASM_RISCV_PCI_H
+#define _ASM_RISCV_PCI_H
#include <linux/types.h>
#include <linux/slab.h>
@@ -34,4 +34,4 @@ static inline int pci_proc_domain(struct pci_bus *bus)
}
#endif /* CONFIG_PCI */
-#endif /* __ASM_PCI_H */
+#endif /* _ASM_RISCV_PCI_H */
diff --git a/arch/riscv/include/asm/pgalloc.h b/arch/riscv/include/asm/pgalloc.h
index d59ea92285ec..3f601ee8233f 100644
--- a/arch/riscv/include/asm/pgalloc.h
+++ b/arch/riscv/include/asm/pgalloc.h
@@ -10,6 +10,7 @@
#include <linux/mm.h>
#include <asm/tlb.h>
+#ifdef CONFIG_MMU
#include <asm-generic/pgalloc.h> /* for pte_{alloc,free}_one */
static inline void pmd_populate_kernel(struct mm_struct *mm,
@@ -81,5 +82,6 @@ do { \
pgtable_pte_page_dtor(pte); \
tlb_remove_page((tlb), pte); \
} while (0)
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_PGALLOC_H */
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index d3221017194d..beb5f0865e39 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -25,6 +25,7 @@
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
+#ifdef CONFIG_MMU
/* Number of entries in the page global directory */
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
/* Number of entries in the page table */
@@ -32,7 +33,6 @@
/* Number of PGD entries that a user-mode program can use */
#define USER_PTRS_PER_PGD (TASK_SIZE / PGDIR_SIZE)
-#define FIRST_USER_ADDRESS 0
/* Page protection bits */
#define _PAGE_BASE (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER)
@@ -84,42 +84,6 @@ extern pgd_t swapper_pg_dir[];
#define __S110 PAGE_SHARED_EXEC
#define __S111 PAGE_SHARED_EXEC
-#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
-#define VMALLOC_END (PAGE_OFFSET - 1)
-#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-#define PCI_IO_SIZE SZ_16M
-
-/*
- * Roughly size the vmemmap space to be large enough to fit enough
- * struct pages to map half the virtual address space. Then
- * position vmemmap directly below the VMALLOC region.
- */
-#define VMEMMAP_SHIFT \
- (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
-#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
-#define VMEMMAP_END (VMALLOC_START - 1)
-#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
-
-#define vmemmap ((struct page *)VMEMMAP_START)
-
-#define PCI_IO_END VMEMMAP_START
-#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
-#define FIXADDR_TOP PCI_IO_START
-
-#ifdef CONFIG_64BIT
-#define FIXADDR_SIZE PMD_SIZE
-#else
-#define FIXADDR_SIZE PGDIR_SIZE
-#endif
-#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
-
-/*
- * ZERO_PAGE is a global shared page that is always zero,
- * used for zero-mapped memory areas, etc.
- */
-extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
-
static inline int pmd_present(pmd_t pmd)
{
return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE));
@@ -430,11 +394,34 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma,
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val })
-#define kern_addr_valid(addr) (1) /* FIXME */
+#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
+#define VMALLOC_END (PAGE_OFFSET - 1)
+#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
-extern void *dtb_early_va;
-extern void setup_bootmem(void);
-extern void paging_init(void);
+/*
+ * Roughly size the vmemmap space to be large enough to fit enough
+ * struct pages to map half the virtual address space. Then
+ * position vmemmap directly below the VMALLOC region.
+ */
+#define VMEMMAP_SHIFT \
+ (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT)
+#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT)
+#define VMEMMAP_END (VMALLOC_START - 1)
+#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE)
+
+#define vmemmap ((struct page *)VMEMMAP_START)
+
+#define PCI_IO_SIZE SZ_16M
+#define PCI_IO_END VMEMMAP_START
+#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE)
+
+#define FIXADDR_TOP PCI_IO_START
+#ifdef CONFIG_64BIT
+#define FIXADDR_SIZE PMD_SIZE
+#else
+#define FIXADDR_SIZE PGDIR_SIZE
+#endif
+#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE)
/*
* Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32.
@@ -446,6 +433,31 @@ extern void paging_init(void);
#define TASK_SIZE FIXADDR_START
#endif
+#else /* CONFIG_MMU */
+
+#define PAGE_KERNEL __pgprot(0)
+#define swapper_pg_dir NULL
+#define VMALLOC_START 0
+
+#define TASK_SIZE 0xffffffffUL
+
+#endif /* !CONFIG_MMU */
+
+#define kern_addr_valid(addr) (1) /* FIXME */
+
+extern void *dtb_early_va;
+void setup_bootmem(void);
+void paging_init(void);
+
+#define FIRST_USER_ADDRESS 0
+
+/*
+ * ZERO_PAGE is a global shared page that is always zero,
+ * used for zero-mapped memory areas, etc.
+ */
+extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
+#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
+
#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h
index f539149d04c2..3ddb798264f1 100644
--- a/arch/riscv/include/asm/processor.h
+++ b/arch/riscv/include/asm/processor.h
@@ -42,7 +42,7 @@ struct thread_struct {
((struct pt_regs *)(task_stack_page(tsk) + THREAD_SIZE \
- ALIGN(sizeof(struct pt_regs), STACK_ALIGN)))
-#define KSTK_EIP(tsk) (task_pt_regs(tsk)->sepc)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->epc)
#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index d48d1e13973c..ee49f80c9533 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -12,7 +12,7 @@
#ifndef __ASSEMBLY__
struct pt_regs {
- unsigned long sepc;
+ unsigned long epc;
unsigned long ra;
unsigned long sp;
unsigned long gp;
@@ -44,10 +44,10 @@ struct pt_regs {
unsigned long t4;
unsigned long t5;
unsigned long t6;
- /* Supervisor CSRs */
- unsigned long sstatus;
- unsigned long sbadaddr;
- unsigned long scause;
+ /* Supervisor/Machine CSRs */
+ unsigned long status;
+ unsigned long badaddr;
+ unsigned long cause;
/* a0 value before the syscall */
unsigned long orig_a0;
};
@@ -58,18 +58,18 @@ struct pt_regs {
#define REG_FMT "%08lx"
#endif
-#define user_mode(regs) (((regs)->sstatus & SR_SPP) == 0)
+#define user_mode(regs) (((regs)->status & SR_PP) == 0)
/* Helpers for working with the instruction pointer */
static inline unsigned long instruction_pointer(struct pt_regs *regs)
{
- return regs->sepc;
+ return regs->epc;
}
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{
- regs->sepc = val;
+ regs->epc = val;
}
#define profile_pc(regs) instruction_pointer(regs)
diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 21134b3ef404..2570c1e683d3 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -8,6 +8,7 @@
#include <linux/types.h>
+#ifdef CONFIG_RISCV_SBI
#define SBI_SET_TIMER 0
#define SBI_CONSOLE_PUTCHAR 1
#define SBI_CONSOLE_GETCHAR 2
@@ -93,5 +94,11 @@ static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
{
SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid);
}
-
-#endif
+#else /* CONFIG_RISCV_SBI */
+/* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
+void sbi_set_timer(uint64_t stime_value);
+void sbi_clear_ipi(void);
+void sbi_send_ipi(const unsigned long *hart_mask);
+void sbi_remote_fence_i(const unsigned long *hart_mask);
+#endif /* CONFIG_RISCV_SBI */
+#endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/include/asm/seccomp.h b/arch/riscv/include/asm/seccomp.h
new file mode 100644
index 000000000000..bf7744ee3b3d
--- /dev/null
+++ b/arch/riscv/include/asm/seccomp.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_SECCOMP_H
+#define _ASM_SECCOMP_H
+
+#include <asm/unistd.h>
+
+#include <asm-generic/seccomp.h>
+
+#endif /* _ASM_SECCOMP_H */
diff --git a/arch/riscv/include/asm/sparsemem.h b/arch/riscv/include/asm/sparsemem.h
index b58ba2d9ed6e..45a7018a8118 100644
--- a/arch/riscv/include/asm/sparsemem.h
+++ b/arch/riscv/include/asm/sparsemem.h
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_SPARSEMEM_H
-#define __ASM_SPARSEMEM_H
+#ifndef _ASM_RISCV_SPARSEMEM_H
+#define _ASM_RISCV_SPARSEMEM_H
#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_PA_BITS
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_SPARSEMEM */
-#endif /* __ASM_SPARSEMEM_H */
+#endif /* _ASM_RISCV_SPARSEMEM_H */
diff --git a/arch/riscv/include/asm/spinlock_types.h b/arch/riscv/include/asm/spinlock_types.h
index 888cbf8e7111..f398e7638dd6 100644
--- a/arch/riscv/include/asm/spinlock_types.h
+++ b/arch/riscv/include/asm/spinlock_types.h
@@ -22,4 +22,4 @@ typedef struct {
#define __ARCH_RW_LOCK_UNLOCKED { 0 }
-#endif
+#endif /* _ASM_RISCV_SPINLOCK_TYPES_H */
diff --git a/arch/riscv/include/asm/switch_to.h b/arch/riscv/include/asm/switch_to.h
index ee4f0ac62c9d..407bcc96a710 100644
--- a/arch/riscv/include/asm/switch_to.h
+++ b/arch/riscv/include/asm/switch_to.h
@@ -17,19 +17,19 @@ extern void __fstate_restore(struct task_struct *restore_from);
static inline void __fstate_clean(struct pt_regs *regs)
{
- regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_CLEAN;
+ regs->status = (regs->status & ~SR_FS) | SR_FS_CLEAN;
}
static inline void fstate_off(struct task_struct *task,
struct pt_regs *regs)
{
- regs->sstatus = (regs->sstatus & ~SR_FS) | SR_FS_OFF;
+ regs->status = (regs->status & ~SR_FS) | SR_FS_OFF;
}
static inline void fstate_save(struct task_struct *task,
struct pt_regs *regs)
{
- if ((regs->sstatus & SR_FS) == SR_FS_DIRTY) {
+ if ((regs->status & SR_FS) == SR_FS_DIRTY) {
__fstate_save(task);
__fstate_clean(regs);
}
@@ -38,7 +38,7 @@ static inline void fstate_save(struct task_struct *task,
static inline void fstate_restore(struct task_struct *task,
struct pt_regs *regs)
{
- if ((regs->sstatus & SR_FS) != SR_FS_OFF) {
+ if ((regs->status & SR_FS) != SR_FS_OFF) {
__fstate_restore(task);
__fstate_clean(regs);
}
@@ -50,7 +50,7 @@ static inline void __switch_to_aux(struct task_struct *prev,
struct pt_regs *regs;
regs = task_pt_regs(prev);
- if (unlikely(regs->sstatus & SR_SD))
+ if (unlikely(regs->status & SR_SD))
fstate_save(prev, regs);
fstate_restore(next, task_pt_regs(next));
}
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index 905372d7eeb8..1dd12a0cbb2b 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -75,6 +75,7 @@ struct thread_info {
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
+#define TIF_SECCOMP 8 /* syscall secure computing */
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
@@ -82,11 +83,13 @@ struct thread_info {
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
+#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_WORK_MASK \
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED)
#define _TIF_SYSCALL_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT)
+ (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
+ _TIF_SECCOMP)
#endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h
index c7ef131b9e4c..bad2a7c2cda5 100644
--- a/arch/riscv/include/asm/timex.h
+++ b/arch/riscv/include/asm/timex.h
@@ -7,12 +7,25 @@
#define _ASM_RISCV_TIMEX_H
#include <asm/csr.h>
+#include <asm/mmio.h>
typedef unsigned long cycles_t;
+extern u64 __iomem *riscv_time_val;
+extern u64 __iomem *riscv_time_cmp;
+
+#ifdef CONFIG_64BIT
+#define mmio_get_cycles() readq_relaxed(riscv_time_val)
+#else
+#define mmio_get_cycles() readl_relaxed(riscv_time_val)
+#define mmio_get_cycles_hi() readl_relaxed(((u32 *)riscv_time_val) + 1)
+#endif
+
static inline cycles_t get_cycles(void)
{
- return csr_read(CSR_TIME);
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ return csr_read(CSR_TIME);
+ return mmio_get_cycles();
}
#define get_cycles get_cycles
@@ -24,7 +37,9 @@ static inline u64 get_cycles64(void)
#else /* CONFIG_64BIT */
static inline u32 get_cycles_hi(void)
{
- return csr_read(CSR_TIMEH);
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ return csr_read(CSR_TIMEH);
+ return mmio_get_cycles_hi();
}
static inline u64 get_cycles64(void)
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index f02188a5b0f4..394cfbccdcd9 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -10,6 +10,7 @@
#include <linux/mm_types.h>
#include <asm/smp.h>
+#ifdef CONFIG_MMU
static inline void local_flush_tlb_all(void)
{
__asm__ __volatile__ ("sfence.vma" : : : "memory");
@@ -20,14 +21,19 @@ static inline void local_flush_tlb_page(unsigned long addr)
{
__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
}
+#else /* CONFIG_MMU */
+#define local_flush_tlb_all() do { } while (0)
+#define local_flush_tlb_page(addr) do { } while (0)
+#endif /* CONFIG_MMU */
-#ifdef CONFIG_SMP
+#if defined(CONFIG_SMP) && defined(CONFIG_MMU)
void flush_tlb_all(void);
void flush_tlb_mm(struct mm_struct *mm);
void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
-#else /* CONFIG_SMP */
+#else /* CONFIG_SMP && CONFIG_MMU */
+
#define flush_tlb_all() local_flush_tlb_all()
#define flush_tlb_page(vma, addr) local_flush_tlb_page(addr)
@@ -38,7 +44,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
}
#define flush_tlb_mm(mm) flush_tlb_all()
-#endif /* CONFIG_SMP */
+#endif /* !CONFIG_SMP || !CONFIG_MMU */
/* Flush a range of kernel pages */
static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h
index e076437cfafe..f462a183a9c2 100644
--- a/arch/riscv/include/asm/uaccess.h
+++ b/arch/riscv/include/asm/uaccess.h
@@ -11,6 +11,7 @@
/*
* User space memory access functions
*/
+#ifdef CONFIG_MMU
#include <linux/errno.h>
#include <linux/compiler.h>
#include <linux/thread_info.h>
@@ -475,4 +476,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n)
__ret; \
})
+#else /* CONFIG_MMU */
+#include <asm-generic/uaccess.h>
+#endif /* CONFIG_MMU */
#endif /* _ASM_RISCV_UACCESS_H */
diff --git a/arch/riscv/include/uapi/asm/elf.h b/arch/riscv/include/uapi/asm/elf.h
index 644a00ce6e2e..d696d6610231 100644
--- a/arch/riscv/include/uapi/asm/elf.h
+++ b/arch/riscv/include/uapi/asm/elf.h
@@ -9,8 +9,8 @@
* (at your option) any later version.
*/
-#ifndef _UAPI_ASM_ELF_H
-#define _UAPI_ASM_ELF_H
+#ifndef _UAPI_ASM_RISCV_ELF_H
+#define _UAPI_ASM_RISCV_ELF_H
#include <asm/ptrace.h>
@@ -95,4 +95,4 @@ typedef union __riscv_fp_state elf_fpregset_t;
#define R_RISCV_32_PCREL 57
-#endif /* _UAPI_ASM_ELF_H */
+#endif /* _UAPI_ASM_RISCV_ELF_H */
diff --git a/arch/riscv/include/uapi/asm/hwcap.h b/arch/riscv/include/uapi/asm/hwcap.h
index 4e7646077056..dee98ee28318 100644
--- a/arch/riscv/include/uapi/asm/hwcap.h
+++ b/arch/riscv/include/uapi/asm/hwcap.h
@@ -5,8 +5,8 @@
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2017 SiFive
*/
-#ifndef __UAPI_ASM_HWCAP_H
-#define __UAPI_ASM_HWCAP_H
+#ifndef _UAPI_ASM_RISCV_HWCAP_H
+#define _UAPI_ASM_RISCV_HWCAP_H
/*
* Linux saves the floating-point registers according to the ISA Linux is
@@ -22,4 +22,4 @@
#define COMPAT_HWCAP_ISA_D (1 << ('D' - 'A'))
#define COMPAT_HWCAP_ISA_C (1 << ('C' - 'A'))
-#endif
+#endif /* _UAPI_ASM_RISCV_HWCAP_H */
diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h
index 411dd7b52ed6..44eb993950e5 100644
--- a/arch/riscv/include/uapi/asm/ucontext.h
+++ b/arch/riscv/include/uapi/asm/ucontext.h
@@ -5,8 +5,8 @@
*
* This file was copied from arch/arm64/include/uapi/asm/ucontext.h
*/
-#ifndef _UAPI__ASM_UCONTEXT_H
-#define _UAPI__ASM_UCONTEXT_H
+#ifndef _UAPI_ASM_RISCV_UCONTEXT_H
+#define _UAPI_ASM_RISCV_UCONTEXT_H
#include <linux/types.h>
@@ -31,4 +31,4 @@ struct ucontext {
struct sigcontext uc_mcontext;
};
-#endif /* _UAPI__ASM_UCONTEXT_H */
+#endif /* _UAPI_ASM_RISCV_UCONTEXT_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 696020ff72db..f40205cb9a22 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -25,10 +25,10 @@ obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
-obj-y += vdso.o
obj-y += cacheinfo.o
-obj-y += vdso/
+obj-$(CONFIG_MMU) += vdso.o vdso/
+obj-$(CONFIG_RISCV_M_MODE) += clint.o
obj-$(CONFIG_FPU) += fpu.o
obj-$(CONFIG_SMP) += smpboot.o
obj-$(CONFIG_SMP) += smp.o
@@ -41,5 +41,6 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_PERF_EVENTS) += perf_event.o
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
+obj-$(CONFIG_RISCV_SBI) += sbi.o
clean:
diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
index 9f5628c38ac9..07cb9c10de4e 100644
--- a/arch/riscv/kernel/asm-offsets.c
+++ b/arch/riscv/kernel/asm-offsets.c
@@ -71,7 +71,7 @@ void asm_offsets(void)
OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
DEFINE(PT_SIZE, sizeof(struct pt_regs));
- OFFSET(PT_SEPC, pt_regs, sepc);
+ OFFSET(PT_EPC, pt_regs, epc);
OFFSET(PT_RA, pt_regs, ra);
OFFSET(PT_FP, pt_regs, s0);
OFFSET(PT_S0, pt_regs, s0);
@@ -105,9 +105,9 @@ void asm_offsets(void)
OFFSET(PT_T6, pt_regs, t6);
OFFSET(PT_GP, pt_regs, gp);
OFFSET(PT_ORIG_A0, pt_regs, orig_a0);
- OFFSET(PT_SSTATUS, pt_regs, sstatus);
- OFFSET(PT_SBADADDR, pt_regs, sbadaddr);
- OFFSET(PT_SCAUSE, pt_regs, scause);
+ OFFSET(PT_STATUS, pt_regs, status);
+ OFFSET(PT_BADADDR, pt_regs, badaddr);
+ OFFSET(PT_CAUSE, pt_regs, cause);
/*
* THREAD_{F,X}* might be larger than a S-type offset can handle, but
diff --git a/arch/riscv/kernel/clint.c b/arch/riscv/kernel/clint.c
new file mode 100644
index 000000000000..3647980d14c3
--- /dev/null
+++ b/arch/riscv/kernel/clint.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ */
+
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/types.h>
+#include <asm/clint.h>
+#include <asm/csr.h>
+#include <asm/timex.h>
+#include <asm/smp.h>
+
+/*
+ * This is the layout used by the SiFive clint, which is also shared by the qemu
+ * virt platform, and the Kendryte KD210 at least.
+ */
+#define CLINT_IPI_OFF 0
+#define CLINT_TIME_CMP_OFF 0x4000
+#define CLINT_TIME_VAL_OFF 0xbff8
+
+u32 __iomem *clint_ipi_base;
+
+void clint_init_boot_cpu(void)
+{
+ struct device_node *np;
+ void __iomem *base;
+
+ np = of_find_compatible_node(NULL, NULL, "riscv,clint0");
+ if (!np) {
+ panic("clint not found");
+ return;
+ }
+
+ base = of_iomap(np, 0);
+ if (!base)
+ panic("could not map CLINT");
+
+ clint_ipi_base = base + CLINT_IPI_OFF;
+ riscv_time_cmp = base + CLINT_TIME_CMP_OFF;
+ riscv_time_val = base + CLINT_TIME_VAL_OFF;
+
+ clint_clear_ipi(boot_cpu_hartid);
+}
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 7da3c6a93abd..40a3c442ac5f 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -46,51 +46,12 @@ int riscv_of_processor_hartid(struct device_node *node)
#ifdef CONFIG_PROC_FS
-static void print_isa(struct seq_file *f, const char *orig_isa)
+static void print_isa(struct seq_file *f, const char *isa)
{
- static const char *ext = "mafdcsu";
- const char *isa = orig_isa;
- const char *e;
-
- /*
- * Linux doesn't support rv32e or rv128i, and we only support booting
- * kernels on harts with the same ISA that the kernel is compiled for.
- */
-#if defined(CONFIG_32BIT)
- if (strncmp(isa, "rv32i", 5) != 0)
- return;
-#elif defined(CONFIG_64BIT)
- if (strncmp(isa, "rv64i", 5) != 0)
- return;
-#endif
-
- /* Print the base ISA, as we already know it's legal. */
+ /* Print the entire ISA as it is */
seq_puts(f, "isa\t\t: ");
- seq_write(f, isa, 5);
- isa += 5;
-
- /*
- * Check the rest of the ISA string for valid extensions, printing those
- * we find. RISC-V ISA strings define an order, so we only print the
- * extension bits when they're in order. Hide the supervisor (S)
- * extension from userspace as it's not accessible from there.
- */
- for (e = ext; *e != '\0'; ++e) {
- if (isa[0] == e[0]) {
- if (isa[0] != 's')
- seq_write(f, isa, 1);
-
- isa++;
- }
- }
+ seq_write(f, isa, strlen(isa));
seq_puts(f, "\n");
-
- /*
- * If we were given an unsupported ISA in the device tree then print
- * a bit of info describing what went wrong.
- */
- if (isa[0] != '\0')
- pr_info("unsupported ISA \"%s\" in device tree\n", orig_isa);
}
static void print_mmu(struct seq_file *f, const char *mmu_type)
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 8ca479831142..a1349ca64669 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -26,14 +26,14 @@
/*
* If coming from userspace, preserve the user thread pointer and load
- * the kernel thread pointer. If we came from the kernel, sscratch
- * will contain 0, and we should continue on the current TP.
+ * the kernel thread pointer. If we came from the kernel, the scratch
+ * register will contain 0, and we should continue on the current TP.
*/
- csrrw tp, CSR_SSCRATCH, tp
+ csrrw tp, CSR_SCRATCH, tp
bnez tp, _save_context
_restore_kernel_tpsp:
- csrr tp, CSR_SSCRATCH
+ csrr tp, CSR_SCRATCH
REG_S sp, TASK_TI_KERNEL_SP(tp)
_save_context:
REG_S sp, TASK_TI_USER_SP(tp)
@@ -79,16 +79,16 @@ _save_context:
li t0, SR_SUM | SR_FS
REG_L s0, TASK_TI_USER_SP(tp)
- csrrc s1, CSR_SSTATUS, t0
- csrr s2, CSR_SEPC
- csrr s3, CSR_STVAL
- csrr s4, CSR_SCAUSE
- csrr s5, CSR_SSCRATCH
+ csrrc s1, CSR_STATUS, t0
+ csrr s2, CSR_EPC
+ csrr s3, CSR_TVAL
+ csrr s4, CSR_CAUSE
+ csrr s5, CSR_SCRATCH
REG_S s0, PT_SP(sp)
- REG_S s1, PT_SSTATUS(sp)
- REG_S s2, PT_SEPC(sp)
- REG_S s3, PT_SBADADDR(sp)
- REG_S s4, PT_SCAUSE(sp)
+ REG_S s1, PT_STATUS(sp)
+ REG_S s2, PT_EPC(sp)
+ REG_S s3, PT_BADADDR(sp)
+ REG_S s4, PT_CAUSE(sp)
REG_S s5, PT_TP(sp)
.endm
@@ -97,7 +97,7 @@ _save_context:
* registers from the stack.
*/
.macro RESTORE_ALL
- REG_L a0, PT_SSTATUS(sp)
+ REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
* state, in the sense that load reservations cannot be shared between
@@ -115,11 +115,11 @@ _save_context:
* completes, implementations are allowed to expand reservations to be
* arbitrarily large.
*/
- REG_L a2, PT_SEPC(sp)
- REG_SC x0, a2, PT_SEPC(sp)
+ REG_L a2, PT_EPC(sp)
+ REG_SC x0, a2, PT_EPC(sp)
- csrw CSR_SSTATUS, a0
- csrw CSR_SEPC, a2
+ csrw CSR_STATUS, a0
+ csrw CSR_EPC, a2
REG_L x1, PT_RA(sp)
REG_L x3, PT_GP(sp)
@@ -163,10 +163,10 @@ ENTRY(handle_exception)
SAVE_ALL
/*
- * Set sscratch register to 0, so that if a recursive exception
+ * Set the scratch register to 0, so that if a recursive exception
* occurs, the exception vector knows it came from the kernel
*/
- csrw CSR_SSCRATCH, x0
+ csrw CSR_SCRATCH, x0
/* Load the global pointer */
.option push
@@ -185,11 +185,13 @@ ENTRY(handle_exception)
move a0, sp /* pt_regs */
tail do_IRQ
1:
- /* Exceptions run with interrupts enabled or disabled
- depending on the state of sstatus.SR_SPIE */
- andi t0, s1, SR_SPIE
+ /*
+ * Exceptions run with interrupts enabled or disabled depending on the
+ * state of SR_PIE in m/sstatus.
+ */
+ andi t0, s1, SR_PIE
beqz t0, 1f
- csrs CSR_SSTATUS, SR_SIE
+ csrs CSR_STATUS, SR_IE
1:
/* Handle syscalls */
@@ -217,7 +219,7 @@ handle_syscall:
* scall instruction on sret
*/
addi s2, s2, 0x4
- REG_S s2, PT_SEPC(sp)
+ REG_S s2, PT_EPC(sp)
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
@@ -226,8 +228,25 @@ check_syscall_nr:
/* Check to make sure we don't jump to a bogus syscall number. */
li t0, __NR_syscalls
la s0, sys_ni_syscall
- /* Syscall number held in a7 */
- bgeu a7, t0, 1f
+ /*
+ * The tracer can change syscall number to valid/invalid value.
+ * We use syscall_set_nr helper in syscall_trace_enter thus we
+ * cannot trust the current value in a7 and have to reload from
+ * the current task pt_regs.
+ */
+ REG_L a7, PT_A7(sp)
+ /*
+ * Syscall number held in a7.
+ * If syscall number is above allowed value, redirect to ni_syscall.
+ */
+ bge a7, t0, 1f
+ /*
+ * Check if syscall is rejected by tracer or seccomp, i.e., a7 == -1.
+ * If yes, we pretend it was executed.
+ */
+ li t1, -1
+ beq a7, t1, ret_from_syscall_rejected
+ /* Call syscall */
la s0, sys_call_table
slli t0, a7, RISCV_LGPTR
add s0, s0, t0
@@ -238,15 +257,27 @@ check_syscall_nr:
ret_from_syscall:
/* Set user a0 to kernel a0 */
REG_S a0, PT_A0(sp)
+ /*
+ * We didn't execute the actual syscall.
+ * Seccomp already set return value for the current task pt_regs.
+ * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
+ */
+ret_from_syscall_rejected:
/* Trace syscalls, but only if requested by the user. */
REG_L t0, TASK_TI_FLAGS(tp)
andi t0, t0, _TIF_SYSCALL_WORK
bnez t0, handle_syscall_trace_exit
ret_from_exception:
- REG_L s0, PT_SSTATUS(sp)
- csrc CSR_SSTATUS, SR_SIE
+ REG_L s0, PT_STATUS(sp)
+ csrc CSR_STATUS, SR_IE
+#ifdef CONFIG_RISCV_M_MODE
+ /* the MPP value is too large to be used as an immediate arg for addi */
+ li t0, SR_MPP
+ and s0, s0, t0
+#else
andi s0, s0, SR_SPP
+#endif
bnez s0, resume_kernel
resume_userspace:
@@ -260,14 +291,18 @@ resume_userspace:
REG_S s0, TASK_TI_KERNEL_SP(tp)
/*
- * Save TP into sscratch, so we can find the kernel data structures
- * again.
+ * Save TP into the scratch register , so we can find the kernel data
+ * structures again.
*/
- csrw CSR_SSCRATCH, tp
+ csrw CSR_SCRATCH, tp
restore_all:
RESTORE_ALL
+#ifdef CONFIG_RISCV_M_MODE
+ mret
+#else
sret
+#endif
#if IS_ENABLED(CONFIG_PREEMPT)
resume_kernel:
@@ -287,7 +322,7 @@ work_pending:
bnez s1, work_resched
work_notifysig:
/* Handle pending signals and notify-resume requests */
- csrs CSR_SSTATUS, SR_SIE /* Enable interrupts for do_notify_resume() */
+ csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */
move a0, sp /* pt_regs */
move a1, s0 /* current_thread_info->flags */
tail do_notify_resume
@@ -386,6 +421,10 @@ ENTRY(__switch_to)
ret
ENDPROC(__switch_to)
+#ifndef CONFIG_MMU
+#define do_page_fault do_trap_unknown
+#endif
+
.section ".rodata"
/* Exception vector table */
ENTRY(excp_vect_table)
@@ -407,3 +446,10 @@ ENTRY(excp_vect_table)
RISCV_PTR do_page_fault /* store page fault */
excp_vect_table_end:
END(excp_vect_table)
+
+#ifndef CONFIG_MMU
+ENTRY(__user_rt_sigreturn)
+ li a7, __NR_rt_sigreturn
+ scall
+END(__user_rt_sigreturn)
+#endif
diff --git a/arch/riscv/kernel/fpu.S b/arch/riscv/kernel/fpu.S
index 631d31540660..dd2205473de7 100644
--- a/arch/riscv/kernel/fpu.S
+++ b/arch/riscv/kernel/fpu.S
@@ -23,7 +23,7 @@ ENTRY(__fstate_save)
li a2, TASK_THREAD_F0
add a0, a0, a2
li t1, SR_FS
- csrs CSR_SSTATUS, t1
+ csrs CSR_STATUS, t1
frcsr t0
fsd f0, TASK_THREAD_F0_F0(a0)
fsd f1, TASK_THREAD_F1_F0(a0)
@@ -58,7 +58,7 @@ ENTRY(__fstate_save)
fsd f30, TASK_THREAD_F30_F0(a0)
fsd f31, TASK_THREAD_F31_F0(a0)
sw t0, TASK_THREAD_FCSR_F0(a0)
- csrc CSR_SSTATUS, t1
+ csrc CSR_STATUS, t1
ret
ENDPROC(__fstate_save)
@@ -67,7 +67,7 @@ ENTRY(__fstate_restore)
add a0, a0, a2
li t1, SR_FS
lw t0, TASK_THREAD_FCSR_F0(a0)
- csrs CSR_SSTATUS, t1
+ csrs CSR_STATUS, t1
fld f0, TASK_THREAD_F0_F0(a0)
fld f1, TASK_THREAD_F1_F0(a0)
fld f2, TASK_THREAD_F2_F0(a0)
@@ -101,6 +101,6 @@ ENTRY(__fstate_restore)
fld f30, TASK_THREAD_F30_F0(a0)
fld f31, TASK_THREAD_F31_F0(a0)
fscsr t0
- csrc CSR_SSTATUS, t1
+ csrc CSR_STATUS, t1
ret
ENDPROC(__fstate_restore)
diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S
index 72f89b7590dd..84a6f0a4b120 100644
--- a/arch/riscv/kernel/head.S
+++ b/arch/riscv/kernel/head.S
@@ -11,6 +11,7 @@
#include <asm/thread_info.h>
#include <asm/page.h>
#include <asm/csr.h>
+#include <asm/hwcap.h>
#include <asm/image.h>
__INIT
@@ -47,8 +48,22 @@ ENTRY(_start)
.global _start_kernel
_start_kernel:
/* Mask all interrupts */
- csrw CSR_SIE, zero
- csrw CSR_SIP, zero
+ csrw CSR_IE, zero
+ csrw CSR_IP, zero
+
+#ifdef CONFIG_RISCV_M_MODE
+ /* flush the instruction cache */
+ fence.i
+
+ /* Reset all registers except ra, a0, a1 */
+ call reset_regs
+
+ /*
+ * The hartid in a0 is expected later on, and we have no firmware
+ * to hand it to us.
+ */
+ csrr a0, CSR_MHARTID
+#endif /* CONFIG_RISCV_M_MODE */
/* Load the global pointer */
.option push
@@ -61,7 +76,7 @@ _start_kernel:
* floating point in kernel space
*/
li t0, SR_FS
- csrc CSR_SSTATUS, t0
+ csrc CSR_STATUS, t0
#ifdef CONFIG_SMP
li t0, CONFIG_NR_CPUS
@@ -94,8 +109,10 @@ clear_bss_done:
la sp, init_thread_union + THREAD_SIZE
mv a0, s1
call setup_vm
+#ifdef CONFIG_MMU
la a0, early_pg_dir
call relocate
+#endif /* CONFIG_MMU */
/* Restore C environment */
la tp, init_task
@@ -106,6 +123,7 @@ clear_bss_done:
call parse_dtb
tail start_kernel
+#ifdef CONFIG_MMU
relocate:
/* Relocate return address */
li a1, PAGE_OFFSET
@@ -116,7 +134,7 @@ relocate:
/* Point stvec to virtual address of intruction after satp write */
la a2, 1f
add a2, a2, a1
- csrw CSR_STVEC, a2
+ csrw CSR_TVEC, a2
/* Compute satp for kernel page tables, but don't load it yet */
srl a2, a0, PAGE_SHIFT
@@ -138,7 +156,7 @@ relocate:
1:
/* Set trap vector to spin forever to help debug */
la a0, .Lsecondary_park
- csrw CSR_STVEC, a0
+ csrw CSR_TVEC, a0
/* Reload the global pointer */
.option push
@@ -156,12 +174,13 @@ relocate:
sfence.vma
ret
+#endif /* CONFIG_MMU */
.Lsecondary_start:
#ifdef CONFIG_SMP
/* Set trap vector to spin forever to help debug */
la a3, .Lsecondary_park
- csrw CSR_STVEC, a3
+ csrw CSR_TVEC, a3
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
@@ -181,9 +200,11 @@ relocate:
beqz tp, .Lwait_for_cpu_up
fence
+#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
call relocate
+#endif
tail smp_callin
#endif
@@ -195,6 +216,85 @@ relocate:
j .Lsecondary_park
END(_start)
+#ifdef CONFIG_RISCV_M_MODE
+ENTRY(reset_regs)
+ li sp, 0
+ li gp, 0
+ li tp, 0
+ li t0, 0
+ li t1, 0
+ li t2, 0
+ li s0, 0
+ li s1, 0
+ li a2, 0
+ li a3, 0
+ li a4, 0
+ li a5, 0
+ li a6, 0
+ li a7, 0
+ li s2, 0
+ li s3, 0
+ li s4, 0
+ li s5, 0
+ li s6, 0
+ li s7, 0
+ li s8, 0
+ li s9, 0
+ li s10, 0
+ li s11, 0
+ li t3, 0
+ li t4, 0
+ li t5, 0
+ li t6, 0
+ csrw sscratch, 0
+
+#ifdef CONFIG_FPU
+ csrr t0, CSR_MISA
+ andi t0, t0, (COMPAT_HWCAP_ISA_F | COMPAT_HWCAP_ISA_D)
+ bnez t0, .Lreset_regs_done
+
+ li t1, SR_FS
+ csrs CSR_STATUS, t1
+ fmv.s.x f0, zero
+ fmv.s.x f1, zero
+ fmv.s.x f2, zero
+ fmv.s.x f3, zero
+ fmv.s.x f4, zero
+ fmv.s.x f5, zero
+ fmv.s.x f6, zero
+ fmv.s.x f7, zero
+ fmv.s.x f8, zero
+ fmv.s.x f9, zero
+ fmv.s.x f10, zero
+ fmv.s.x f11, zero
+ fmv.s.x f12, zero
+ fmv.s.x f13, zero
+ fmv.s.x f14, zero
+ fmv.s.x f15, zero
+ fmv.s.x f16, zero
+ fmv.s.x f17, zero
+ fmv.s.x f18, zero
+ fmv.s.x f19, zero
+ fmv.s.x f20, zero
+ fmv.s.x f21, zero
+ fmv.s.x f22, zero
+ fmv.s.x f23, zero
+ fmv.s.x f24, zero
+ fmv.s.x f25, zero
+ fmv.s.x f26, zero
+ fmv.s.x f27, zero
+ fmv.s.x f28, zero
+ fmv.s.x f29, zero
+ fmv.s.x f30, zero
+ fmv.s.x f31, zero
+ csrw fcsr, 0
+ /* note that the caller must clear SR_FS */
+#endif /* CONFIG_FPU */
+.Lreset_regs_done:
+ ret
+END(reset_regs)
+#endif /* CONFIG_RISCV_M_MODE */
+
__PAGE_ALIGNED_BSS
/* Empty zero page */
.balign PAGE_SIZE
diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c
index fffac6ddb0e0..3f07a91d5afb 100644
--- a/arch/riscv/kernel/irq.c
+++ b/arch/riscv/kernel/irq.c
@@ -11,13 +11,6 @@
#include <linux/seq_file.h>
#include <asm/smp.h>
-/*
- * Possible interrupt causes:
- */
-#define INTERRUPT_CAUSE_SOFTWARE IRQ_S_SOFT
-#define INTERRUPT_CAUSE_TIMER IRQ_S_TIMER
-#define INTERRUPT_CAUSE_EXTERNAL IRQ_S_EXT
-
int arch_show_interrupts(struct seq_file *p, int prec)
{
show_ipi_stats(p, prec);
@@ -29,12 +22,12 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
struct pt_regs *old_regs = set_irq_regs(regs);
irq_enter();
- switch (regs->scause & ~SCAUSE_IRQ_FLAG) {
- case INTERRUPT_CAUSE_TIMER:
+ switch (regs->cause & ~CAUSE_IRQ_FLAG) {
+ case IRQ_TIMER:
riscv_timer_interrupt();
break;
#ifdef CONFIG_SMP
- case INTERRUPT_CAUSE_SOFTWARE:
+ case IRQ_SOFT:
/*
* We only use software interrupts to pass IPIs, so if a non-SMP
* system gets one, then we don't know what to do.
@@ -42,11 +35,11 @@ asmlinkage __visible void __irq_entry do_IRQ(struct pt_regs *regs)
riscv_software_interrupt();
break;
#endif
- case INTERRUPT_CAUSE_EXTERNAL:
+ case IRQ_EXT:
handle_arch_irq(regs);
break;
default:
- pr_alert("unexpected interrupt cause 0x%lx", regs->scause);
+ pr_alert("unexpected interrupt cause 0x%lx", regs->cause);
BUG();
}
irq_exit();
diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c
index 70bb94ae61c5..b7401858d872 100644
--- a/arch/riscv/kernel/module.c
+++ b/arch/riscv/kernel/module.c
@@ -315,8 +315,8 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
/* Ignore unresolved weak symbol */
if (ELF_ST_BIND(sym->st_info) == STB_WEAK)
continue;
- pr_warning("%s: Unknown symbol %s\n",
- me->name, strtab + sym->st_name);
+ pr_warn("%s: Unknown symbol %s\n",
+ me->name, strtab + sym->st_name);
return -ENOENT;
}
diff --git a/arch/riscv/kernel/perf_callchain.c b/arch/riscv/kernel/perf_callchain.c
index 8d2804f05cf9..cf190197a22f 100644
--- a/arch/riscv/kernel/perf_callchain.c
+++ b/arch/riscv/kernel/perf_callchain.c
@@ -67,7 +67,7 @@ void perf_callchain_user(struct perf_callchain_entry_ctx *entry,
return;
fp = regs->s0;
- perf_callchain_store(entry, regs->sepc);
+ perf_callchain_store(entry, regs->epc);
fp = user_backtrace(entry, fp, regs->ra);
while (fp && !(fp & 0x3) && entry->nr < entry->max_stack)
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 85e3c39bb60b..95a3031e5c7c 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -35,8 +35,8 @@ void show_regs(struct pt_regs *regs)
{
show_regs_print_info(KERN_DEFAULT);
- pr_cont("sepc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
- regs->sepc, regs->ra, regs->sp);
+ pr_cont("epc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
+ regs->epc, regs->ra, regs->sp);
pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
regs->gp, regs->tp, regs->t0);
pr_cont(" t1 : " REG_FMT " t2 : " REG_FMT " s0 : " REG_FMT "\n",
@@ -58,23 +58,23 @@ void show_regs(struct pt_regs *regs)
pr_cont(" t5 : " REG_FMT " t6 : " REG_FMT "\n",
regs->t5, regs->t6);
- pr_cont("sstatus: " REG_FMT " sbadaddr: " REG_FMT " scause: " REG_FMT "\n",
- regs->sstatus, regs->sbadaddr, regs->scause);
+ pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
+ regs->status, regs->badaddr, regs->cause);
}
void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
- regs->sstatus = SR_SPIE;
+ regs->status = SR_PIE;
if (has_fpu) {
- regs->sstatus |= SR_FS_INITIAL;
+ regs->status |= SR_FS_INITIAL;
/*
* Restore the initial value to the FP register
* before starting the user program.
*/
fstate_restore(current, regs);
}
- regs->sepc = pc;
+ regs->epc = pc;
regs->sp = sp;
set_fs(USER_DS);
}
@@ -110,7 +110,8 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
const register unsigned long gp __asm__ ("gp");
memset(childregs, 0, sizeof(struct pt_regs));
childregs->gp = gp;
- childregs->sstatus = SR_SPP | SR_SPIE; /* Supervisor, irqs on */
+ /* Supervisor/Machine, irqs on: */
+ childregs->status = SR_PP | SR_PIE;
p->thread.ra = (unsigned long)ret_from_kernel_thread;
p->thread.s[0] = usp; /* fn */
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 1252113ef8b2..0f84628b9385 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -154,6 +154,16 @@ __visible void do_syscall_trace_enter(struct pt_regs *regs)
if (tracehook_report_syscall_entry(regs))
syscall_set_nr(current, regs, -1);
+ /*
+ * Do the secure computing after ptrace; failures should be fast.
+ * If this fails we might have return value in a0 from seccomp
+ * (via SECCOMP_RET_ERRNO/TRACE).
+ */
+ if (secure_computing(NULL) == -1) {
+ syscall_set_nr(current, regs, -1);
+ return;
+ }
+
#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
trace_sys_enter(regs, syscall_get_nr(current, regs));
diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c
index aa56bb135ec4..ee5878d968cc 100644
--- a/arch/riscv/kernel/reset.c
+++ b/arch/riscv/kernel/reset.c
@@ -5,12 +5,11 @@
#include <linux/reboot.h>
#include <linux/pm.h>
-#include <asm/sbi.h>
static void default_power_off(void)
{
- sbi_shutdown();
- while (1);
+ while (1)
+ wait_for_interrupt();
}
void (*pm_power_off)(void) = default_power_off;
diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c
new file mode 100644
index 000000000000..f6c7c3e82d28
--- /dev/null
+++ b/arch/riscv/kernel/sbi.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/init.h>
+#include <linux/pm.h>
+#include <asm/sbi.h>
+
+static void sbi_power_off(void)
+{
+ sbi_shutdown();
+}
+
+static int __init sbi_init(void)
+{
+ pm_power_off = sbi_power_off;
+ return 0;
+}
+early_initcall(sbi_init);
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 845ae0e12115..365ff8420bfe 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -17,6 +17,7 @@
#include <linux/sched/task.h>
#include <linux/swiotlb.h>
+#include <asm/clint.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/pgtable.h>
@@ -67,6 +68,7 @@ void __init setup_arch(char **cmdline_p)
setup_bootmem();
paging_init();
unflatten_device_tree();
+ clint_init_boot_cpu();
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index d0f6f212f5df..17ba190e84a5 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -17,11 +17,16 @@
#include <asm/switch_to.h>
#include <asm/csr.h>
+extern u32 __user_rt_sigreturn[2];
+
#define DEBUG_SIG 0
struct rt_sigframe {
struct siginfo info;
struct ucontext uc;
+#ifndef CONFIG_MMU
+ u32 sigreturn_code[2];
+#endif
};
#ifdef CONFIG_FPU
@@ -124,7 +129,7 @@ badframe:
pr_info_ratelimited(
"%s[%d]: bad frame in %s: frame=%p pc=%p sp=%p\n",
task->comm, task_pid_nr(task), __func__,
- frame, (void *)regs->sepc, (void *)regs->sp);
+ frame, (void *)regs->epc, (void *)regs->sp);
}
force_sig(SIGSEGV);
return 0;
@@ -166,7 +171,6 @@ static inline void __user *get_sigframe(struct ksignal *ksig,
return (void __user *)sp;
}
-
static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
struct pt_regs *regs)
{
@@ -189,8 +193,19 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
return -EFAULT;
/* Set up to return from userspace. */
+#ifdef CONFIG_MMU
regs->ra = (unsigned long)VDSO_SYMBOL(
current->mm->context.vdso, rt_sigreturn);
+#else
+ /*
+ * For the nommu case we don't have a VDSO. Instead we push two
+ * instructions to call the rt_sigreturn syscall onto the user stack.
+ */
+ if (copy_to_user(&frame->sigreturn_code, __user_rt_sigreturn,
+ sizeof(frame->sigreturn_code)))
+ return -EFAULT;
+ regs->ra = (unsigned long)&frame->sigreturn_code;
+#endif /* CONFIG_MMU */
/*
* Set up registers for signal handler.
@@ -199,7 +214,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
* We always pass siginfo and mcontext, regardless of SA_SIGINFO,
* since some things rely on this (e.g. glibc's debug/segfault.c).
*/
- regs->sepc = (unsigned long)ksig->ka.sa.sa_handler;
+ regs->epc = (unsigned long)ksig->ka.sa.sa_handler;
regs->sp = (unsigned long)frame;
regs->a0 = ksig->sig; /* a0: signal number */
regs->a1 = (unsigned long)(&frame->info); /* a1: siginfo pointer */
@@ -208,7 +223,7 @@ static int setup_rt_frame(struct ksignal *ksig, sigset_t *set,
#if DEBUG_SIG
pr_info("SIG deliver (%s:%d): sig=%d pc=%p ra=%p sp=%p\n",
current->comm, task_pid_nr(current), ksig->sig,
- (void *)regs->sepc, (void *)regs->ra, frame);
+ (void *)regs->epc, (void *)regs->ra, frame);
#endif
return 0;
@@ -220,10 +235,9 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
int ret;
/* Are we from a system call? */
- if (regs->scause == EXC_SYSCALL) {
+ if (regs->cause == EXC_SYSCALL) {
/* Avoid additional syscall restarting via ret_from_exception */
- regs->scause = -1UL;
-
+ regs->cause = -1UL;
/* If so, check system call restarting.. */
switch (regs->a0) {
case -ERESTART_RESTARTBLOCK:
@@ -239,7 +253,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
/* fallthrough */
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
- regs->sepc -= 0x4;
+ regs->epc -= 0x4;
break;
}
}
@@ -261,9 +275,9 @@ static void do_signal(struct pt_regs *regs)
}
/* Did we come from a system call? */
- if (regs->scause == EXC_SYSCALL) {
+ if (regs->cause == EXC_SYSCALL) {
/* Avoid additional syscall restarting via ret_from_exception */
- regs->scause = -1UL;
+ regs->cause = -1UL;
/* Restart the system call - no handlers present */
switch (regs->a0) {
@@ -271,12 +285,12 @@ static void do_signal(struct pt_regs *regs)
case -ERESTARTSYS:
case -ERESTARTNOINTR:
regs->a0 = regs->orig_a0;
- regs->sepc -= 0x4;
+ regs->epc -= 0x4;
break;
case -ERESTART_RESTARTBLOCK:
regs->a0 = regs->orig_a0;
regs->a7 = __NR_restart_syscall;
- regs->sepc -= 0x4;
+ regs->epc -= 0x4;
break;
}
}
diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c
index 5c9ec78422c2..eb878abcaaf8 100644
--- a/arch/riscv/kernel/smp.c
+++ b/arch/riscv/kernel/smp.c
@@ -16,6 +16,7 @@
#include <linux/seq_file.h>
#include <linux/delay.h>
+#include <asm/clint.h>
#include <asm/sbi.h>
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
@@ -92,7 +93,10 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op)
smp_mb__after_atomic();
riscv_cpuid_to_hartid_mask(mask, &hartid_mask);
- sbi_send_ipi(cpumask_bits(&hartid_mask));
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ sbi_send_ipi(cpumask_bits(&hartid_mask));
+ else
+ clint_send_ipi_mask(&hartid_mask);
}
static void send_ipi_single(int cpu, enum ipi_message_type op)
@@ -103,12 +107,18 @@ static void send_ipi_single(int cpu, enum ipi_message_type op)
set_bit(op, &ipi_data[cpu].bits);
smp_mb__after_atomic();
- sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ sbi_send_ipi(cpumask_bits(cpumask_of(hartid)));
+ else
+ clint_send_ipi_single(hartid);
}
static inline void clear_ipi(void)
{
- csr_clear(CSR_SIP, SIE_SSIE);
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ csr_clear(CSR_IP, IE_SIE);
+ else
+ clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
}
void riscv_software_interrupt(void)
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 261f4087cc39..8bc01f0ca73b 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/sched/task_stack.h>
#include <linux/sched/mm.h>
+#include <asm/clint.h>
#include <asm/irq.h>
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>
@@ -137,6 +138,9 @@ asmlinkage __visible void __init smp_callin(void)
{
struct mm_struct *mm = &init_mm;
+ if (!IS_ENABLED(CONFIG_RISCV_SBI))
+ clint_clear_ipi(cpuid_to_hartid_map(smp_processor_id()));
+
/* All kernel threads share the same mm context. */
mmgrab(mm);
current->active_mm = mm;
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index 473de3ae8bb7..f4cad5163bf2 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -41,7 +41,7 @@ void die(struct pt_regs *regs, const char *str)
print_modules();
show_regs(regs);
- ret = notify_die(DIE_OOPS, str, regs, 0, regs->scause, SIGSEGV);
+ ret = notify_die(DIE_OOPS, str, regs, 0, regs->cause, SIGSEGV);
bust_spinlocks(0);
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
@@ -86,7 +86,7 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
#define DO_ERROR_INFO(name, signo, code, str) \
asmlinkage __visible void name(struct pt_regs *regs) \
{ \
- do_trap_error(regs, signo, code, regs->sepc, "Oops - " str); \
+ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
}
DO_ERROR_INFO(do_trap_unknown,
@@ -124,9 +124,9 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
asmlinkage __visible void do_trap_break(struct pt_regs *regs)
{
if (user_mode(regs))
- force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->sepc);
- else if (report_bug(regs->sepc, regs) == BUG_TRAP_TYPE_WARN)
- regs->sepc += get_break_insn_length(regs->sepc);
+ force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc);
+ else if (report_bug(regs->epc, regs) == BUG_TRAP_TYPE_WARN)
+ regs->epc += get_break_insn_length(regs->epc);
else
die(regs, "Kernel BUG");
}
@@ -153,9 +153,9 @@ void __init trap_init(void)
* Set sup0 scratch register to 0, indicating to exception vector
* that we are presently executing in the kernel
*/
- csr_write(CSR_SSCRATCH, 0);
+ csr_write(CSR_SCRATCH, 0);
/* Set the exception vector address */
- csr_write(CSR_STVEC, &handle_exception);
+ csr_write(CSR_TVEC, &handle_exception);
/* Enable all interrupts */
- csr_write(CSR_SIE, -1);
+ csr_write(CSR_IE, -1);
}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 23cd1a9e52a1..12f42f96d46e 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -52,12 +52,12 @@ SECTIONS
/* Start of data section */
_sdata = .;
- RO_DATA_SECTION(L1_CACHE_BYTES)
+ RO_DATA(L1_CACHE_BYTES)
.srodata : {
*(.srodata*)
}
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
.sdata : {
__global_pointer$ = . + 0x800;
*(.sdata*)
@@ -69,7 +69,6 @@ SECTIONS
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
EXCEPTION_TABLE(0x10)
- NOTES
.rel.dyn : {
*(.rel.dyn*)
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 267feaa10f6a..47e7a8204460 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-lib-y += delay.o
-lib-y += memcpy.o
-lib-y += memset.o
-lib-y += uaccess.o
-
-lib-$(CONFIG_64BIT) += tishift.o
+lib-y += delay.o
+lib-y += memcpy.o
+lib-y += memset.o
+lib-$(CONFIG_MMU) += uaccess.o
+lib-$(CONFIG_64BIT) += tishift.o
diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S
index ed2696c0143d..fecd65657a6f 100644
--- a/arch/riscv/lib/uaccess.S
+++ b/arch/riscv/lib/uaccess.S
@@ -18,7 +18,7 @@ ENTRY(__asm_copy_from_user)
/* Enable access to user memory */
li t6, SR_SUM
- csrs CSR_SSTATUS, t6
+ csrs CSR_STATUS, t6
add a3, a1, a2
/* Use word-oriented copy only if low-order bits match */
@@ -47,7 +47,7 @@ ENTRY(__asm_copy_from_user)
3:
/* Disable access to user memory */
- csrc CSR_SSTATUS, t6
+ csrc CSR_STATUS, t6
li a0, 0
ret
4: /* Edge case: unalignment */
@@ -72,7 +72,7 @@ ENTRY(__clear_user)
/* Enable access to user memory */
li t6, SR_SUM
- csrs CSR_SSTATUS, t6
+ csrs CSR_STATUS, t6
add a3, a0, a1
addi t0, a0, SZREG-1
@@ -94,7 +94,7 @@ ENTRY(__clear_user)
3:
/* Disable access to user memory */
- csrc CSR_SSTATUS, t6
+ csrc CSR_STATUS, t6
li a0, 0
ret
4: /* Edge case: unalignment */
@@ -114,11 +114,11 @@ ENDPROC(__clear_user)
/* Fixup code for __copy_user(10) and __clear_user(11) */
10:
/* Disable access to user memory */
- csrs CSR_SSTATUS, t6
+ csrs CSR_STATUS, t6
mv a0, a2
ret
11:
- csrs CSR_SSTATUS, t6
+ csrs CSR_STATUS, t6
mv a0, a1
ret
.previous
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 9d9a17335686..44ab8f28c3fa 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -6,9 +6,8 @@ CFLAGS_REMOVE_init.o = -pg
endif
obj-y += init.o
-obj-y += fault.o
obj-y += extable.o
-obj-y += ioremap.o
+obj-$(CONFIG_MMU) += fault.o ioremap.o
obj-y += cacheflush.o
obj-y += context.o
obj-y += sifive_l2_cache.o
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 3f15938dec89..8f1900686640 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -10,9 +10,17 @@
#include <asm/sbi.h>
+static void ipi_remote_fence_i(void *info)
+{
+ return local_flush_icache_all();
+}
+
void flush_icache_all(void)
{
- sbi_remote_fence_i(NULL);
+ if (IS_ENABLED(CONFIG_RISCV_SBI))
+ sbi_remote_fence_i(NULL);
+ else
+ on_each_cpu(ipi_remote_fence_i, NULL, 1);
}
/*
@@ -28,7 +36,7 @@ void flush_icache_all(void)
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
- cpumask_t others, hmask, *mask;
+ cpumask_t others, *mask;
preempt_disable();
@@ -46,10 +54,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
- if (mm != current->active_mm || !local) {
- riscv_cpuid_to_hartid_mask(&others, &hmask);
- sbi_remote_fence_i(hmask.bits);
- } else {
+ if (mm == current->active_mm && local) {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
@@ -59,6 +64,13 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
* with flush_icache_deferred().
*/
smp_mb();
+ } else if (IS_ENABLED(CONFIG_RISCV_SBI)) {
+ cpumask_t hartid_mask;
+
+ riscv_cpuid_to_hartid_mask(&others, &hartid_mask);
+ sbi_remote_fence_i(cpumask_bits(&hartid_mask));
+ } else {
+ on_each_cpu_mask(&others, ipi_remote_fence_i, NULL, 1);
}
preempt_enable();
@@ -66,6 +78,7 @@ void flush_icache_mm(struct mm_struct *mm, bool local)
#endif /* CONFIG_SMP */
+#ifdef CONFIG_MMU
void flush_icache_pte(pte_t pte)
{
struct page *page = pte_page(pte);
@@ -73,3 +86,4 @@ void flush_icache_pte(pte_t pte)
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
flush_icache_all();
}
+#endif /* CONFIG_MMU */
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index ca66d44156b6..613ec81a8979 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -58,8 +58,10 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
+#ifdef CONFIG_MMU
csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
local_flush_tlb_all();
+#endif
flush_icache_deferred(next);
}
diff --git a/arch/riscv/mm/extable.c b/arch/riscv/mm/extable.c
index 7aed9178d365..2fc729422151 100644
--- a/arch/riscv/mm/extable.c
+++ b/arch/riscv/mm/extable.c
@@ -15,9 +15,9 @@ int fixup_exception(struct pt_regs *regs)
{
const struct exception_table_entry *fixup;
- fixup = search_exception_tables(regs->sepc);
+ fixup = search_exception_tables(regs->epc);
if (fixup) {
- regs->sepc = fixup->fixup;
+ regs->epc = fixup->fixup;
return 1;
}
return 0;
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index 247b8c859c44..cf7248e07f43 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -34,8 +34,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
int code = SEGV_MAPERR;
vm_fault_t fault;
- cause = regs->scause;
- addr = regs->sbadaddr;
+ cause = regs->cause;
+ addr = regs->badaddr;
tsk = current;
mm = tsk->mm;
@@ -53,7 +53,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
goto vmalloc_fault;
/* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->sstatus & SR_SPIE))
+ if (likely(regs->status & SR_PIE))
local_irq_enable();
/*
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 573463d1c799..b2fe9d1be833 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -26,6 +26,7 @@ unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
+void *dtb_early_va;
static void __init zone_sizes_init(void)
{
@@ -40,7 +41,7 @@ static void __init zone_sizes_init(void)
free_area_init_nodes(max_zone_pfns);
}
-void setup_zero_page(void)
+static void setup_zero_page(void)
{
memset((void *)empty_zero_page, 0, PAGE_SIZE);
}
@@ -142,12 +143,12 @@ void __init setup_bootmem(void)
}
}
+#ifdef CONFIG_MMU
unsigned long va_pa_offset;
EXPORT_SYMBOL(va_pa_offset);
unsigned long pfn_base;
EXPORT_SYMBOL(pfn_base);
-void *dtb_early_va;
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -273,7 +274,6 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
#define get_pgd_next_virt(__pa) get_pmd_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pmd_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE PMD_SIZE
#define fixmap_pgd_next fixmap_pmd
#else
#define pgd_next_t pte_t
@@ -281,7 +281,6 @@ static void __init create_pmd_mapping(pmd_t *pmdp,
#define get_pgd_next_virt(__pa) get_pte_virt(__pa)
#define create_pgd_next_mapping(__nextp, __va, __pa, __sz, __prot) \
create_pte_mapping(__nextp, __va, __pa, __sz, __prot)
-#define PTE_PARENT_SIZE PGDIR_SIZE
#define fixmap_pgd_next fixmap_pte
#endif
@@ -314,14 +313,11 @@ static void __init create_pgd_mapping(pgd_t *pgdp,
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
- uintptr_t map_size = PAGE_SIZE;
-
- /* Upgrade to PMD/PGDIR mappings whenever possible */
- if (!(base & (PTE_PARENT_SIZE - 1)) &&
- !(size & (PTE_PARENT_SIZE - 1)))
- map_size = PTE_PARENT_SIZE;
+ /* Upgrade to PMD_SIZE mappings whenever possible */
+ if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
+ return PAGE_SIZE;
- return map_size;
+ return PMD_SIZE;
}
/*
@@ -449,6 +445,16 @@ static void __init setup_vm_final(void)
csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | SATP_MODE);
local_flush_tlb_all();
}
+#else
+asmlinkage void __init setup_vm(uintptr_t dtb_pa)
+{
+ dtb_early_va = (void *)dtb_pa;
+}
+
+static inline void setup_vm_final(void)
+{
+}
+#endif /* CONFIG_MMU */
void __init paging_init(void)
{
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 24cd33d2c48f..720b443c4528 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -2,6 +2,7 @@
#include <linux/mm.h>
#include <linux/smp.h>
+#include <linux/sched.h>
#include <asm/sbi.h>
void flush_tlb_all(void)
@@ -9,13 +10,33 @@ void flush_tlb_all(void)
sbi_remote_sfence_vma(NULL, 0, -1);
}
+/*
+ * This function must not be called with cmask being null.
+ * Kernel may panic if cmask is NULL.
+ */
static void __sbi_tlb_flush_range(struct cpumask *cmask, unsigned long start,
unsigned long size)
{
struct cpumask hmask;
+ unsigned int cpuid;
- riscv_cpuid_to_hartid_mask(cmask, &hmask);
- sbi_remote_sfence_vma(hmask.bits, start, size);
+ if (cpumask_empty(cmask))
+ return;
+
+ cpuid = get_cpu();
+
+ if (cpumask_any_but(cmask, cpuid) >= nr_cpu_ids) {
+ /* local cpu is the only cpu present in cpumask */
+ if (size <= PAGE_SIZE)
+ local_flush_tlb_page(start);
+ else
+ local_flush_tlb_all();
+ } else {
+ riscv_cpuid_to_hartid_mask(cmask, &hmask);
+ sbi_remote_sfence_vma(cpumask_bits(&hmask), start, size);
+ }
+
+ put_cpu();
}
void flush_tlb_mm(struct mm_struct *mm)
diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig
index 38d64030aacf..2e60c80395ab 100644
--- a/arch/s390/configs/debug_defconfig
+++ b/arch/s390/configs/debug_defconfig
@@ -62,7 +62,6 @@ CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
-CONFIG_REFCOUNT_FULL=y
CONFIG_LOCK_EVENT_COUNTS=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 9803e96d2924..ead0b2c9881d 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -44,7 +44,7 @@ struct s390_aes_ctx {
int key_len;
unsigned long fc;
union {
- struct crypto_sync_skcipher *blk;
+ struct crypto_skcipher *skcipher;
struct crypto_cipher *cip;
} fallback;
};
@@ -54,7 +54,7 @@ struct s390_xts_ctx {
u8 pcc_key[32];
int key_len;
unsigned long fc;
- struct crypto_sync_skcipher *fallback;
+ struct crypto_skcipher *fallback;
};
struct gcm_sg_walk {
@@ -178,66 +178,41 @@ static struct crypto_alg aes_alg = {
}
};
-static int setkey_fallback_blk(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
+static int setkey_fallback_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
-
- crypto_sync_skcipher_clear_flags(sctx->fallback.blk,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(sctx->fallback.blk, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(sctx->fallback.blk, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(sctx->fallback.blk) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
-}
-
-static int fallback_blk_dec(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ int ret;
- skcipher_request_zero(req);
+ crypto_skcipher_clear_flags(sctx->fallback.skcipher,
+ CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(sctx->fallback.skcipher,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ ret = crypto_skcipher_setkey(sctx->fallback.skcipher, key, len);
+ crypto_skcipher_set_flags(tfm,
+ crypto_skcipher_get_flags(sctx->fallback.skcipher) &
+ CRYPTO_TFM_RES_MASK);
return ret;
}
-static int fallback_blk_enc(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int fallback_skcipher_crypt(struct s390_aes_ctx *sctx,
+ struct skcipher_request *req,
+ unsigned long modifier)
{
- unsigned int ret;
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, sctx->fallback.blk);
-
- skcipher_request_set_sync_tfm(req, sctx->fallback.blk);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
- ret = crypto_skcipher_encrypt(req);
- return ret;
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, sctx->fallback.skcipher);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
}
-static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -248,111 +223,92 @@ static int ecb_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
-
return ret;
}
-static int ecb_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, 0, &walk);
+ return ecb_aes_crypt(req, 0);
}
-static int ecb_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_aes_crypt(req, CPACF_DECRYPT);
}
-static int fallback_init_blk(struct crypto_tfm *tfm)
+static int fallback_init_skcipher(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- sctx->fallback.blk = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ sctx->fallback.skcipher = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
- if (IS_ERR(sctx->fallback.blk)) {
+ if (IS_ERR(sctx->fallback.skcipher)) {
pr_err("Allocating AES fallback algorithm %s failed\n",
name);
- return PTR_ERR(sctx->fallback.blk);
+ return PTR_ERR(sctx->fallback.skcipher);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(sctx->fallback.skcipher));
return 0;
}
-static void fallback_exit_blk(struct crypto_tfm *tfm)
+static void fallback_exit_skcipher(struct crypto_skcipher *tfm)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(sctx->fallback.blk);
+ crypto_free_skcipher(sctx->fallback.skcipher);
}
-static struct crypto_alg ecb_aes_alg = {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = ecb_aes_set_key,
- .encrypt = ecb_aes_encrypt,
- .decrypt = ecb_aes_decrypt,
- }
- }
+static struct skcipher_alg ecb_aes_alg = {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = ecb_aes_set_key,
+ .encrypt = ecb_aes_encrypt,
+ .decrypt = ecb_aes_decrypt,
};
-static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -363,17 +319,18 @@ static int cbc_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
return 0;
}
-static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -381,134 +338,74 @@ static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 key[AES_MAX_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, modifier);
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
memcpy(param.key, sctx->key, sctx->key_len);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_kmc(sctx->fc | modifier, &param,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
return ret;
}
-static int cbc_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_encrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, 0, &walk);
+ return cbc_aes_crypt(req, 0);
}
-static int cbc_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_aes_decrypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_aes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_aes_alg = {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_aes_set_key,
- .encrypt = cbc_aes_encrypt,
- .decrypt = cbc_aes_decrypt,
- }
- }
+static struct skcipher_alg cbc_aes_alg = {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_aes_set_key,
+ .encrypt = cbc_aes_encrypt,
+ .decrypt = cbc_aes_decrypt,
};
-static int xts_fallback_setkey(struct crypto_tfm *tfm, const u8 *key,
- unsigned int len)
-{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
- unsigned int ret;
-
- crypto_sync_skcipher_clear_flags(xts_ctx->fallback,
- CRYPTO_TFM_REQ_MASK);
- crypto_sync_skcipher_set_flags(xts_ctx->fallback, tfm->crt_flags &
- CRYPTO_TFM_REQ_MASK);
-
- ret = crypto_sync_skcipher_setkey(xts_ctx->fallback, key, len);
-
- tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK;
- tfm->crt_flags |= crypto_sync_skcipher_get_flags(xts_ctx->fallback) &
- CRYPTO_TFM_RES_MASK;
-
- return ret;
-}
-
-static int xts_fallback_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_decrypt(req);
-
- skcipher_request_zero(req);
- return ret;
-}
-
-static int xts_fallback_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_fallback_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int len)
{
- struct crypto_blkcipher *tfm = desc->tfm;
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(tfm);
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, xts_ctx->fallback);
- unsigned int ret;
-
- skcipher_request_set_sync_tfm(req, xts_ctx->fallback);
- skcipher_request_set_callback(req, desc->flags, NULL, NULL);
- skcipher_request_set_crypt(req, src, dst, nbytes, desc->info);
-
- ret = crypto_skcipher_encrypt(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ int ret;
- skcipher_request_zero(req);
+ crypto_skcipher_clear_flags(xts_ctx->fallback, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(xts_ctx->fallback,
+ crypto_skcipher_get_flags(tfm) &
+ CRYPTO_TFM_REQ_MASK);
+ ret = crypto_skcipher_setkey(xts_ctx->fallback, key, len);
+ crypto_skcipher_set_flags(tfm,
+ crypto_skcipher_get_flags(xts_ctx->fallback) &
+ CRYPTO_TFM_RES_MASK);
return ret;
}
-static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
int err;
@@ -518,7 +415,7 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* In fips mode only 128 bit or 256 bit keys are valid */
if (fips_enabled && key_len != 32 && key_len != 64) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -539,10 +436,11 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
-static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_aes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int offset, nbytes, n;
int ret;
struct {
@@ -557,113 +455,100 @@ static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ if (req->cryptlen < AES_BLOCK_SIZE)
+ return -EINVAL;
+
+ if (unlikely(!xts_ctx->fc || (req->cryptlen % AES_BLOCK_SIZE) != 0)) {
+ struct skcipher_request *subreq = skcipher_request_ctx(req);
+
+ *subreq = *req;
+ skcipher_request_set_tfm(subreq, xts_ctx->fallback);
+ return (modifier & CPACF_DECRYPT) ?
+ crypto_skcipher_decrypt(subreq) :
+ crypto_skcipher_encrypt(subreq);
+ }
+
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
offset = xts_ctx->key_len & 0x10;
memset(pcc_param.block, 0, sizeof(pcc_param.block));
memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
return ret;
}
-static int xts_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_encrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_encrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, 0, &walk);
+ return xts_aes_crypt(req, 0);
}
-static int xts_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_aes_decrypt(struct skcipher_request *req)
{
- struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (!nbytes)
- return -EINVAL;
-
- if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
- return xts_fallback_decrypt(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_aes_crypt(req, CPACF_DECRYPT);
}
-static int xts_fallback_init(struct crypto_tfm *tfm)
+static int xts_fallback_init(struct crypto_skcipher *tfm)
{
- const char *name = tfm->__crt_alg->cra_name;
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ const char *name = crypto_tfm_alg_name(&tfm->base);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- xts_ctx->fallback = crypto_alloc_sync_skcipher(name, 0,
- CRYPTO_ALG_NEED_FALLBACK);
+ xts_ctx->fallback = crypto_alloc_skcipher(name, 0,
+ CRYPTO_ALG_NEED_FALLBACK | CRYPTO_ALG_ASYNC);
if (IS_ERR(xts_ctx->fallback)) {
pr_err("Allocating XTS fallback algorithm %s failed\n",
name);
return PTR_ERR(xts_ctx->fallback);
}
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct skcipher_request) +
+ crypto_skcipher_reqsize(xts_ctx->fallback));
return 0;
}
-static void xts_fallback_exit(struct crypto_tfm *tfm)
+static void xts_fallback_exit(struct crypto_skcipher *tfm)
{
- struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
+ struct s390_xts_ctx *xts_ctx = crypto_skcipher_ctx(tfm);
- crypto_free_sync_skcipher(xts_ctx->fallback);
+ crypto_free_skcipher(xts_ctx->fallback);
}
-static struct crypto_alg xts_aes_alg = {
- .cra_name = "xts(aes)",
- .cra_driver_name = "xts-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_xts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = xts_fallback_init,
- .cra_exit = xts_fallback_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * AES_MIN_KEY_SIZE,
- .max_keysize = 2 * AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_aes_set_key,
- .encrypt = xts_aes_encrypt,
- .decrypt = xts_aes_decrypt,
- }
- }
+static struct skcipher_alg xts_aes_alg = {
+ .base.cra_name = "xts(aes)",
+ .base.cra_driver_name = "xts-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_xts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = xts_fallback_init,
+ .exit = xts_fallback_exit,
+ .min_keysize = 2 * AES_MIN_KEY_SIZE,
+ .max_keysize = 2 * AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_aes_set_key,
+ .encrypt = xts_aes_encrypt,
+ .decrypt = xts_aes_decrypt,
};
-static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_aes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
- struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
unsigned long fc;
/* Pick the correct function code based on the key length */
@@ -674,7 +559,7 @@ static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
/* Check if the function code is available */
sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
if (!sctx->fc)
- return setkey_fallback_blk(tfm, in_key, key_len);
+ return setkey_fallback_skcipher(tfm, in_key, key_len);
sctx->key_len = key_len;
memcpy(sctx->key, in_key, key_len);
@@ -696,30 +581,34 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct skcipher_request *req)
{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_aes_ctx *sctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
+ if (unlikely(!sctx->fc))
+ return fallback_skcipher_crypt(sctx, req, 0);
+
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
+
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(sctx->fc, sctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
@@ -727,67 +616,33 @@ static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
* final block may be < AES_BLOCK_SIZE, copy only nbytes
*/
if (nbytes) {
- cpacf_kmctr(sctx->fc | modifier, sctx->key,
- buf, walk->src.virt.addr,
- AES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(sctx->fc, sctx->key, buf, walk.src.virt.addr,
+ AES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_aes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_enc(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, 0, &walk);
-}
-
-static int ctr_aes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
-
- if (unlikely(!sctx->fc))
- return fallback_blk_dec(desc, dst, src, nbytes);
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_aes_alg = {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-s390",
- .cra_priority = 402, /* ecb-aes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER |
- CRYPTO_ALG_NEED_FALLBACK,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_aes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_init = fallback_init_blk,
- .cra_exit = fallback_exit_blk,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_aes_set_key,
- .encrypt = ctr_aes_encrypt,
- .decrypt = ctr_aes_decrypt,
- }
- }
+static struct skcipher_alg ctr_aes_alg = {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-s390",
+ .base.cra_priority = 402, /* ecb-aes-s390 + 1 */
+ .base.cra_flags = CRYPTO_ALG_NEED_FALLBACK,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_aes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .init = fallback_init_skcipher,
+ .exit = fallback_exit_skcipher,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_aes_set_key,
+ .encrypt = ctr_aes_crypt,
+ .decrypt = ctr_aes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
static int gcm_aes_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1116,24 +971,27 @@ static struct aead_alg gcm_aes_aead = {
},
};
-static struct crypto_alg *aes_s390_algs_ptr[5];
-static int aes_s390_algs_num;
+static struct crypto_alg *aes_s390_alg;
+static struct skcipher_alg *aes_s390_skcipher_algs[4];
+static int aes_s390_skciphers_num;
static struct aead_alg *aes_s390_aead_alg;
-static int aes_s390_register_alg(struct crypto_alg *alg)
+static int aes_s390_register_skcipher(struct skcipher_alg *alg)
{
int ret;
- ret = crypto_register_alg(alg);
+ ret = crypto_register_skcipher(alg);
if (!ret)
- aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+ aes_s390_skcipher_algs[aes_s390_skciphers_num++] = alg;
return ret;
}
static void aes_s390_fini(void)
{
- while (aes_s390_algs_num--)
- crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+ if (aes_s390_alg)
+ crypto_unregister_alg(aes_s390_alg);
+ while (aes_s390_skciphers_num--)
+ crypto_unregister_skcipher(aes_s390_skcipher_algs[aes_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
@@ -1154,10 +1012,11 @@ static int __init aes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
- ret = aes_s390_register_alg(&aes_alg);
+ ret = crypto_register_alg(&aes_alg);
if (ret)
goto out_err;
- ret = aes_s390_register_alg(&ecb_aes_alg);
+ aes_s390_alg = &aes_alg;
+ ret = aes_s390_register_skcipher(&ecb_aes_alg);
if (ret)
goto out_err;
}
@@ -1165,14 +1024,14 @@ static int __init aes_s390_init(void)
if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
- ret = aes_s390_register_alg(&cbc_aes_alg);
+ ret = aes_s390_register_skcipher(&cbc_aes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
- ret = aes_s390_register_alg(&xts_aes_alg);
+ ret = aes_s390_register_skcipher(&xts_aes_alg);
if (ret)
goto out_err;
}
@@ -1185,7 +1044,7 @@ static int __init aes_s390_init(void)
ret = -ENOMEM;
goto out_err;
}
- ret = aes_s390_register_alg(&ctr_aes_alg);
+ ret = aes_s390_register_skcipher(&ctr_aes_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 439b100c6f2e..bfbafd35bcbd 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -17,6 +17,7 @@
#include <linux/mutex.h>
#include <crypto/algapi.h>
#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
#include <asm/cpacf.h>
#define DES3_KEY_SIZE (3 * DES_KEY_SIZE)
@@ -45,6 +46,12 @@ static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -79,28 +86,30 @@ static struct crypto_alg des_alg = {
}
};
-static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ecb_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_km(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_km(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
return ret;
}
-static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int cbc_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n;
int ret;
struct {
@@ -108,99 +117,69 @@ static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
u8 key[DES3_KEY_SIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, DES_BLOCK_SIZE);
memcpy(param.key, ctx->key, DES3_KEY_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(DES_BLOCK_SIZE - 1);
- cpacf_kmc(fc, &param, walk->dst.virt.addr,
- walk->src.virt.addr, n);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ cpacf_kmc(fc, &param, walk.dst.virt.addr,
+ walk.src.virt.addr, n);
+ memcpy(walk.iv, param.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
- memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
return ret;
}
-static int ecb_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA);
}
-static int ecb_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des_alg = {
- .cra_name = "ecb(des)",
- .cra_driver_name = "ecb-des-s390",
- .cra_priority = 400, /* combo: des + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .setkey = des_setkey,
- .encrypt = ecb_des_encrypt,
- .decrypt = ecb_des_decrypt,
- }
- }
+static struct skcipher_alg ecb_des_alg = {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-s390",
+ .base.cra_priority = 400, /* combo: des + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ecb_des_encrypt,
+ .decrypt = ecb_des_decrypt,
};
-static int cbc_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA);
}
-static int cbc_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_DEA | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des_alg = {
- .cra_name = "cbc(des)",
- .cra_driver_name = "cbc-des-s390",
- .cra_priority = 400, /* combo: des + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = cbc_des_encrypt,
- .decrypt = cbc_des_decrypt,
- }
- }
+static struct skcipher_alg cbc_des_alg = {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-s390",
+ .base.cra_priority = 400, /* combo: des + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = cbc_des_encrypt,
+ .decrypt = cbc_des_decrypt,
};
/*
@@ -232,6 +211,12 @@ static int des3_setkey(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des3_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int key_len)
+{
+ return des3_setkey(crypto_skcipher_tfm(tfm), key, key_len);
+}
+
static void des3_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -266,87 +251,53 @@ static struct crypto_alg des3_alg = {
}
};
-static int ecb_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192);
}
-static int ecb_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return ecb_desall_crypt(req, CPACF_KM_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg ecb_des3_alg = {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ecb */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .setkey = des3_setkey,
- .encrypt = ecb_des3_encrypt,
- .decrypt = ecb_des3_decrypt,
- }
- }
+static struct skcipher_alg ecb_des3_alg = {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ecb */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ecb_des3_encrypt,
+ .decrypt = ecb_des3_decrypt,
};
-static int cbc_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192);
}
-static int cbc_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_des3_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return cbc_desall_crypt(req, CPACF_KMC_TDEA_192 | CPACF_DECRYPT);
}
-static struct crypto_alg cbc_des3_alg = {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + cbc */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = cbc_des3_encrypt,
- .decrypt = cbc_des3_decrypt,
- }
- }
+static struct skcipher_alg cbc_des3_alg = {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + cbc */
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = cbc_des3_encrypt,
+ .decrypt = cbc_des3_decrypt,
};
static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
@@ -364,128 +315,90 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
- struct blkcipher_walk *walk)
+static int ctr_desall_crypt(struct skcipher_request *req, unsigned long fc)
{
- struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_des_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[DES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int n, nbytes;
int ret, locked;
locked = mutex_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= DES_BLOCK_SIZE) {
n = DES_BLOCK_SIZE;
if (nbytes >= 2*DES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
- cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
- walk->src.virt.addr, n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ cpacf_kmctr(fc, ctx->key, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + n - DES_BLOCK_SIZE,
DES_BLOCK_SIZE);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (locked)
mutex_unlock(&ctrblk_lock);
/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
if (nbytes) {
- cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
- DES_BLOCK_SIZE, walk->iv);
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, DES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ cpacf_kmctr(fc, ctx->key, buf, walk.src.virt.addr,
+ DES_BLOCK_SIZE, walk.iv);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, DES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_des_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
-}
-
-static int ctr_des_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_DEA);
}
-static struct crypto_alg ctr_des_alg = {
- .cra_name = "ctr(des)",
- .cra_driver_name = "ctr-des-s390",
- .cra_priority = 400, /* combo: des + ctr */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_setkey,
- .encrypt = ctr_des_encrypt,
- .decrypt = ctr_des_decrypt,
- }
- }
+static struct skcipher_alg ctr_des_alg = {
+ .base.cra_name = "ctr(des)",
+ .base.cra_driver_name = "ctr-des-s390",
+ .base.cra_priority = 400, /* combo: des + ctr */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_setkey_skcipher,
+ .encrypt = ctr_des_crypt,
+ .decrypt = ctr_des_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static int ctr_des3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
-}
-
-static int ctr_des3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_des3_crypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
- &walk);
+ return ctr_desall_crypt(req, CPACF_KMCTR_TDEA_192);
}
-static struct crypto_alg ctr_des3_alg = {
- .cra_name = "ctr(des3_ede)",
- .cra_driver_name = "ctr-des3_ede-s390",
- .cra_priority = 400, /* combo: des3 + ede */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_des_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_KEY_SIZE,
- .max_keysize = DES3_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des3_setkey,
- .encrypt = ctr_des3_encrypt,
- .decrypt = ctr_des3_decrypt,
- }
- }
+static struct skcipher_alg ctr_des3_alg = {
+ .base.cra_name = "ctr(des3_ede)",
+ .base.cra_driver_name = "ctr-des3_ede-s390",
+ .base.cra_priority = 400, /* combo: des3 + ede */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_des_ctx),
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_KEY_SIZE,
+ .max_keysize = DES3_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des3_setkey_skcipher,
+ .encrypt = ctr_des3_crypt,
+ .decrypt = ctr_des3_crypt,
+ .chunksize = DES_BLOCK_SIZE,
};
-static struct crypto_alg *des_s390_algs_ptr[8];
+static struct crypto_alg *des_s390_algs_ptr[2];
static int des_s390_algs_num;
+static struct skcipher_alg *des_s390_skciphers_ptr[6];
+static int des_s390_skciphers_num;
static int des_s390_register_alg(struct crypto_alg *alg)
{
@@ -497,10 +410,22 @@ static int des_s390_register_alg(struct crypto_alg *alg)
return ret;
}
+static int des_s390_register_skcipher(struct skcipher_alg *alg)
+{
+ int ret;
+
+ ret = crypto_register_skcipher(alg);
+ if (!ret)
+ des_s390_skciphers_ptr[des_s390_skciphers_num++] = alg;
+ return ret;
+}
+
static void des_s390_exit(void)
{
while (des_s390_algs_num--)
crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+ while (des_s390_skciphers_num--)
+ crypto_unregister_skcipher(des_s390_skciphers_ptr[des_s390_skciphers_num]);
if (ctrblk)
free_page((unsigned long) ctrblk);
}
@@ -518,12 +443,12 @@ static int __init des_s390_init(void)
ret = des_s390_register_alg(&des_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des_alg);
+ ret = des_s390_register_skcipher(&ecb_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
- ret = des_s390_register_alg(&cbc_des_alg);
+ ret = des_s390_register_skcipher(&cbc_des_alg);
if (ret)
goto out_err;
}
@@ -531,12 +456,12 @@ static int __init des_s390_init(void)
ret = des_s390_register_alg(&des3_alg);
if (ret)
goto out_err;
- ret = des_s390_register_alg(&ecb_des3_alg);
+ ret = des_s390_register_skcipher(&ecb_des3_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
- ret = des_s390_register_alg(&cbc_des3_alg);
+ ret = des_s390_register_skcipher(&cbc_des3_alg);
if (ret)
goto out_err;
}
@@ -551,12 +476,12 @@ static int __init des_s390_init(void)
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
- ret = des_s390_register_alg(&ctr_des_alg);
+ ret = des_s390_register_skcipher(&ctr_des_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
- ret = des_s390_register_alg(&ctr_des3_alg);
+ ret = des_s390_register_skcipher(&ctr_des3_alg);
if (ret)
goto out_err;
}
diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c
index 6184dceed340..c7119c617b6e 100644
--- a/arch/s390/crypto/paes_s390.c
+++ b/arch/s390/crypto/paes_s390.c
@@ -21,6 +21,7 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <crypto/internal/skcipher.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
#include <asm/pkey.h>
@@ -123,27 +124,27 @@ static int __paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int ecb_paes_init(struct crypto_tfm *tfm)
+static int ecb_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void ecb_paes_exit(struct crypto_tfm *tfm)
+static void ecb_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
-static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ecb_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -151,91 +152,75 @@ static int ecb_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
-static int ecb_paes_crypt(struct blkcipher_desc *desc,
- unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ecb_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
- ret = blkcipher_walk_virt(desc, walk);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_km(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
if (__paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
}
return ret;
}
-static int ecb_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_ENCRYPT, &walk);
+ return ecb_paes_crypt(req, 0);
}
-static int ecb_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ecb_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return ecb_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg ecb_paes_alg = {
- .cra_name = "ecb(paes)",
- .cra_driver_name = "ecb-paes-s390",
- .cra_priority = 401, /* combo: aes + ecb + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ecb_paes_alg.cra_list),
- .cra_init = ecb_paes_init,
- .cra_exit = ecb_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .setkey = ecb_paes_set_key,
- .encrypt = ecb_paes_encrypt,
- .decrypt = ecb_paes_decrypt,
- }
- }
+static struct skcipher_alg ecb_paes_alg = {
+ .base.cra_name = "ecb(paes)",
+ .base.cra_driver_name = "ecb-paes-s390",
+ .base.cra_priority = 401, /* combo: aes + ecb + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ecb_paes_alg.base.cra_list),
+ .init = ecb_paes_init,
+ .exit = ecb_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .setkey = ecb_paes_set_key,
+ .encrypt = ecb_paes_encrypt,
+ .decrypt = ecb_paes_decrypt,
};
-static int cbc_paes_init(struct crypto_tfm *tfm)
+static int cbc_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void cbc_paes_exit(struct crypto_tfm *tfm)
+static void cbc_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
@@ -258,11 +243,11 @@ static int __cbc_paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int cbc_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -270,16 +255,17 @@ static int cbc_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__cbc_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
}
-static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int cbc_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret;
struct {
@@ -287,73 +273,60 @@ static int cbc_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 key[MAXPROTKEYSIZE];
} param;
- ret = blkcipher_walk_virt(desc, walk);
- memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
+ memcpy(param.iv, walk.iv, AES_BLOCK_SIZE);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_kmc(ctx->fc | modifier, &param,
- walk->dst.virt.addr, walk->src.virt.addr, n);
- if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
+ if (k) {
+ memcpy(walk.iv, param.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - k);
+ }
if (k < n) {
if (__cbc_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
memcpy(param.key, ctx->pk.protkey, MAXPROTKEYSIZE);
}
}
- memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
return ret;
}
-static int cbc_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, 0, &walk);
+ return cbc_paes_crypt(req, 0);
}
-static int cbc_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return cbc_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return cbc_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg cbc_paes_alg = {
- .cra_name = "cbc(paes)",
- .cra_driver_name = "cbc-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(cbc_paes_alg.cra_list),
- .cra_init = cbc_paes_init,
- .cra_exit = cbc_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = cbc_paes_set_key,
- .encrypt = cbc_paes_encrypt,
- .decrypt = cbc_paes_decrypt,
- }
- }
+static struct skcipher_alg cbc_paes_alg = {
+ .base.cra_name = "cbc(paes)",
+ .base.cra_driver_name = "cbc-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(cbc_paes_alg.base.cra_list),
+ .init = cbc_paes_init,
+ .exit = cbc_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = cbc_paes_set_key,
+ .encrypt = cbc_paes_encrypt,
+ .decrypt = cbc_paes_decrypt,
};
-static int xts_paes_init(struct crypto_tfm *tfm)
+static int xts_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb[0].key = NULL;
ctx->kb[1].key = NULL;
@@ -361,9 +334,9 @@ static int xts_paes_init(struct crypto_tfm *tfm)
return 0;
}
-static void xts_paes_exit(struct crypto_tfm *tfm)
+static void xts_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb[0]);
_free_kb_keybuf(&ctx->kb[1]);
@@ -391,11 +364,11 @@ static int __xts_paes_set_key(struct s390_pxts_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int xts_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int xts_key_len)
{
int rc;
- struct s390_pxts_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 ckey[2 * AES_MAX_KEY_SIZE];
unsigned int ckey_len, key_len;
@@ -414,7 +387,7 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__xts_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
@@ -427,13 +400,14 @@ static int xts_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
AES_KEYSIZE_128 : AES_KEYSIZE_256;
memcpy(ckey, ctx->pk[0].protkey, ckey_len);
memcpy(ckey + ckey_len, ctx->pk[1].protkey, ckey_len);
- return xts_check_key(tfm, ckey, 2*ckey_len);
+ return xts_verify_key(tfm, ckey, 2*ckey_len);
}
-static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int xts_paes_crypt(struct skcipher_request *req, unsigned long modifier)
{
- struct s390_pxts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_pxts_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
unsigned int keylen, offset, nbytes, n, k;
int ret;
struct {
@@ -448,90 +422,76 @@ static int xts_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
u8 init[16];
} xts_param;
- ret = blkcipher_walk_virt(desc, walk);
+ ret = skcipher_walk_virt(&walk, req, false);
+ if (ret)
+ return ret;
keylen = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 48 : 64;
offset = (ctx->pk[0].type == PKEY_KEYTYPE_AES_128) ? 16 : 0;
retry:
memset(&pcc_param, 0, sizeof(pcc_param));
- memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
+ memcpy(pcc_param.tweak, walk.iv, sizeof(pcc_param.tweak));
memcpy(pcc_param.key + offset, ctx->pk[1].protkey, keylen);
cpacf_pcc(ctx->fc, pcc_param.key + offset);
memcpy(xts_param.key + offset, ctx->pk[0].protkey, keylen);
memcpy(xts_param.init, pcc_param.xts, 16);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ while ((nbytes = walk.nbytes) != 0) {
/* only use complete blocks */
n = nbytes & ~(AES_BLOCK_SIZE - 1);
k = cpacf_km(ctx->fc | modifier, xts_param.key + offset,
- walk->dst.virt.addr, walk->src.virt.addr, n);
+ walk.dst.virt.addr, walk.src.virt.addr, n);
if (k)
- ret = blkcipher_walk_done(desc, walk, nbytes - k);
+ ret = skcipher_walk_done(&walk, nbytes - k);
if (k < n) {
if (__xts_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
goto retry;
}
}
return ret;
}
-static int xts_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_encrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, 0, &walk);
+ return xts_paes_crypt(req, 0);
}
-static int xts_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int xts_paes_decrypt(struct skcipher_request *req)
{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return xts_paes_crypt(desc, CPACF_DECRYPT, &walk);
+ return xts_paes_crypt(req, CPACF_DECRYPT);
}
-static struct crypto_alg xts_paes_alg = {
- .cra_name = "xts(paes)",
- .cra_driver_name = "xts-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct s390_pxts_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(xts_paes_alg.cra_list),
- .cra_init = xts_paes_init,
- .cra_exit = xts_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = 2 * PAES_MIN_KEYSIZE,
- .max_keysize = 2 * PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = xts_paes_set_key,
- .encrypt = xts_paes_encrypt,
- .decrypt = xts_paes_decrypt,
- }
- }
+static struct skcipher_alg xts_paes_alg = {
+ .base.cra_name = "xts(paes)",
+ .base.cra_driver_name = "xts-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct s390_pxts_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(xts_paes_alg.base.cra_list),
+ .init = xts_paes_init,
+ .exit = xts_paes_exit,
+ .min_keysize = 2 * PAES_MIN_KEYSIZE,
+ .max_keysize = 2 * PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = xts_paes_set_key,
+ .encrypt = xts_paes_encrypt,
+ .decrypt = xts_paes_decrypt,
};
-static int ctr_paes_init(struct crypto_tfm *tfm)
+static int ctr_paes_init(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
ctx->kb.key = NULL;
return 0;
}
-static void ctr_paes_exit(struct crypto_tfm *tfm)
+static void ctr_paes_exit(struct crypto_skcipher *tfm)
{
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
}
@@ -555,11 +515,11 @@ static int __ctr_paes_set_key(struct s390_paes_ctx *ctx)
return ctx->fc ? 0 : -EINVAL;
}
-static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
+static int ctr_paes_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
unsigned int key_len)
{
int rc;
- struct s390_paes_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
_free_kb_keybuf(&ctx->kb);
rc = _copy_key_to_kb(&ctx->kb, in_key, key_len);
@@ -567,7 +527,7 @@ static int ctr_paes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return rc;
if (__ctr_paes_set_key(ctx)) {
- tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
return -EINVAL;
}
return 0;
@@ -588,37 +548,37 @@ static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
return n;
}
-static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
- struct blkcipher_walk *walk)
+static int ctr_paes_crypt(struct skcipher_request *req)
{
- struct s390_paes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ struct s390_paes_ctx *ctx = crypto_skcipher_ctx(tfm);
u8 buf[AES_BLOCK_SIZE], *ctrptr;
+ struct skcipher_walk walk;
unsigned int nbytes, n, k;
int ret, locked;
locked = spin_trylock(&ctrblk_lock);
- ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
- while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
+ ret = skcipher_walk_virt(&walk, req, false);
+ while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
n = AES_BLOCK_SIZE;
if (nbytes >= 2*AES_BLOCK_SIZE && locked)
- n = __ctrblk_init(ctrblk, walk->iv, nbytes);
- ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
- k = cpacf_kmctr(ctx->fc | modifier, ctx->pk.protkey,
- walk->dst.virt.addr, walk->src.virt.addr,
- n, ctrptr);
+ n = __ctrblk_init(ctrblk, walk.iv, nbytes);
+ ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk.iv;
+ k = cpacf_kmctr(ctx->fc, ctx->pk.protkey, walk.dst.virt.addr,
+ walk.src.virt.addr, n, ctrptr);
if (k) {
if (ctrptr == ctrblk)
- memcpy(walk->iv, ctrptr + k - AES_BLOCK_SIZE,
+ memcpy(walk.iv, ctrptr + k - AES_BLOCK_SIZE,
AES_BLOCK_SIZE);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, nbytes - n);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, nbytes - n);
}
if (k < n) {
if (__ctr_paes_set_key(ctx) != 0) {
if (locked)
spin_unlock(&ctrblk_lock);
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
}
}
@@ -629,80 +589,54 @@ static int ctr_paes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
*/
if (nbytes) {
while (1) {
- if (cpacf_kmctr(ctx->fc | modifier,
- ctx->pk.protkey, buf,
- walk->src.virt.addr, AES_BLOCK_SIZE,
- walk->iv) == AES_BLOCK_SIZE)
+ if (cpacf_kmctr(ctx->fc, ctx->pk.protkey, buf,
+ walk.src.virt.addr, AES_BLOCK_SIZE,
+ walk.iv) == AES_BLOCK_SIZE)
break;
if (__ctr_paes_set_key(ctx) != 0)
- return blkcipher_walk_done(desc, walk, -EIO);
+ return skcipher_walk_done(&walk, -EIO);
}
- memcpy(walk->dst.virt.addr, buf, nbytes);
- crypto_inc(walk->iv, AES_BLOCK_SIZE);
- ret = blkcipher_walk_done(desc, walk, 0);
+ memcpy(walk.dst.virt.addr, buf, nbytes);
+ crypto_inc(walk.iv, AES_BLOCK_SIZE);
+ ret = skcipher_walk_done(&walk, 0);
}
return ret;
}
-static int ctr_paes_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, 0, &walk);
-}
-
-static int ctr_paes_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct blkcipher_walk walk;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- return ctr_paes_crypt(desc, CPACF_DECRYPT, &walk);
-}
-
-static struct crypto_alg ctr_paes_alg = {
- .cra_name = "ctr(paes)",
- .cra_driver_name = "ctr-paes-s390",
- .cra_priority = 402, /* ecb-paes-s390 + 1 */
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct s390_paes_ctx),
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_list = LIST_HEAD_INIT(ctr_paes_alg.cra_list),
- .cra_init = ctr_paes_init,
- .cra_exit = ctr_paes_exit,
- .cra_u = {
- .blkcipher = {
- .min_keysize = PAES_MIN_KEYSIZE,
- .max_keysize = PAES_MAX_KEYSIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = ctr_paes_set_key,
- .encrypt = ctr_paes_encrypt,
- .decrypt = ctr_paes_decrypt,
- }
- }
+static struct skcipher_alg ctr_paes_alg = {
+ .base.cra_name = "ctr(paes)",
+ .base.cra_driver_name = "ctr-paes-s390",
+ .base.cra_priority = 402, /* ecb-paes-s390 + 1 */
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct s390_paes_ctx),
+ .base.cra_module = THIS_MODULE,
+ .base.cra_list = LIST_HEAD_INIT(ctr_paes_alg.base.cra_list),
+ .init = ctr_paes_init,
+ .exit = ctr_paes_exit,
+ .min_keysize = PAES_MIN_KEYSIZE,
+ .max_keysize = PAES_MAX_KEYSIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = ctr_paes_set_key,
+ .encrypt = ctr_paes_crypt,
+ .decrypt = ctr_paes_crypt,
+ .chunksize = AES_BLOCK_SIZE,
};
-static inline void __crypto_unregister_alg(struct crypto_alg *alg)
+static inline void __crypto_unregister_skcipher(struct skcipher_alg *alg)
{
- if (!list_empty(&alg->cra_list))
- crypto_unregister_alg(alg);
+ if (!list_empty(&alg->base.cra_list))
+ crypto_unregister_skcipher(alg);
}
static void paes_s390_fini(void)
{
if (ctrblk)
free_page((unsigned long) ctrblk);
- __crypto_unregister_alg(&ctr_paes_alg);
- __crypto_unregister_alg(&xts_paes_alg);
- __crypto_unregister_alg(&cbc_paes_alg);
- __crypto_unregister_alg(&ecb_paes_alg);
+ __crypto_unregister_skcipher(&ctr_paes_alg);
+ __crypto_unregister_skcipher(&xts_paes_alg);
+ __crypto_unregister_skcipher(&cbc_paes_alg);
+ __crypto_unregister_skcipher(&ecb_paes_alg);
}
static int __init paes_s390_init(void)
@@ -717,7 +651,7 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&km_functions, CPACF_KM_PAES_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_192) ||
cpacf_test_func(&km_functions, CPACF_KM_PAES_256)) {
- ret = crypto_register_alg(&ecb_paes_alg);
+ ret = crypto_register_skcipher(&ecb_paes_alg);
if (ret)
goto out_err;
}
@@ -725,14 +659,14 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256)) {
- ret = crypto_register_alg(&cbc_paes_alg);
+ ret = crypto_register_skcipher(&cbc_paes_alg);
if (ret)
goto out_err;
}
if (cpacf_test_func(&km_functions, CPACF_KM_PXTS_128) ||
cpacf_test_func(&km_functions, CPACF_KM_PXTS_256)) {
- ret = crypto_register_alg(&xts_paes_alg);
+ ret = crypto_register_skcipher(&xts_paes_alg);
if (ret)
goto out_err;
}
@@ -740,7 +674,7 @@ static int __init paes_s390_init(void)
if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_128) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_192) ||
cpacf_test_func(&kmctr_functions, CPACF_KMCTR_PAES_256)) {
- ret = crypto_register_alg(&ctr_paes_alg);
+ ret = crypto_register_skcipher(&ctr_paes_alg);
if (ret)
goto out_err;
ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 7e0eb4020917..37695499717d 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -15,6 +15,8 @@
/* Handle ro_after_init data on our own. */
#define RO_AFTER_INIT_DATA
+#define EMITS_PT_NOTE
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/vmlinux.lds.h>
@@ -50,11 +52,7 @@ SECTIONS
_etext = .; /* End of text section */
} :text = 0x0700
- NOTES :text :note
-
- .dummy : { *(.dummy) } :data
-
- RO_DATA_SECTION(PAGE_SIZE)
+ RO_DATA(PAGE_SIZE)
. = ALIGN(PAGE_SIZE);
_sdata = .; /* Start of data section */
@@ -64,12 +62,12 @@ SECTIONS
.data..ro_after_init : {
*(.data..ro_after_init)
JUMP_TABLE_DATA
- }
+ } :data
EXCEPTION_TABLE(16)
. = ALIGN(PAGE_SIZE);
__end_ro_after_init = .;
- RW_DATA_SECTION(0x100, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE)
BOOT_DATA_PRESERVED
_edata = .; /* End of data section */
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index c475ca49cfc6..8df10d3c8f6c 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -247,9 +247,9 @@ void vtime_account_irq_enter(struct task_struct *tsk)
}
EXPORT_SYMBOL_GPL(vtime_account_irq_enter);
-void vtime_account_system(struct task_struct *tsk)
+void vtime_account_kernel(struct task_struct *tsk)
__attribute__((alias("vtime_account_irq_enter")));
-EXPORT_SYMBOL_GPL(vtime_account_system);
+EXPORT_SYMBOL_GPL(vtime_account_kernel);
/*
* Sorted add to a list. List is linear searched until first bigger
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index ce88211b9c6c..8d2134136290 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -23,6 +23,8 @@
#include <linux/filter.h>
#include <linux/init.h>
#include <linux/bpf.h>
+#include <linux/mm.h>
+#include <linux/kernel.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
#include <asm/facility.h>
@@ -38,10 +40,11 @@ struct bpf_jit {
int size; /* Size of program and literal pool */
int size_prg; /* Size of program */
int prg; /* Current position in program */
- int lit_start; /* Start of literal pool */
- int lit; /* Current position in literal pool */
+ int lit32_start; /* Start of 32-bit literal pool */
+ int lit32; /* Current position in 32-bit literal pool */
+ int lit64_start; /* Start of 64-bit literal pool */
+ int lit64; /* Current position in 64-bit literal pool */
int base_ip; /* Base address for literal pool */
- int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
@@ -49,14 +52,10 @@ struct bpf_jit {
int labels[1]; /* Labels for local jumps */
};
-#define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */
-
-#define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */
-#define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */
-#define SEEN_LITERAL (1 << 2) /* code uses literals */
-#define SEEN_FUNC (1 << 3) /* calls C functions */
-#define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */
-#define SEEN_REG_AX (1 << 5) /* code uses constant blinding */
+#define SEEN_MEM BIT(0) /* use mem[] for temporary storage */
+#define SEEN_LITERAL BIT(1) /* code uses literals */
+#define SEEN_FUNC BIT(2) /* calls C functions */
+#define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */
#define SEEN_STACK (SEEN_FUNC | SEEN_MEM)
/*
@@ -131,13 +130,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT2(op) \
({ \
if (jit->prg_buf) \
- *(u16 *) (jit->prg_buf + jit->prg) = op; \
+ *(u16 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 2; \
})
#define EMIT2(op, b1, b2) \
({ \
- _EMIT2(op | reg(b1, b2)); \
+ _EMIT2((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -145,20 +144,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4(op) \
({ \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->prg) = op; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op); \
jit->prg += 4; \
})
#define EMIT4(op, b1, b2) \
({ \
- _EMIT4(op | reg(b1, b2)); \
+ _EMIT4((op) | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT4_RRF(op, b1, b2, b3) \
({ \
- _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \
+ _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
REG_SET_SEEN(b3); \
@@ -167,13 +166,13 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT4_DISP(op, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT4(op | __disp); \
+ _EMIT4((op) | __disp); \
})
#define EMIT4_DISP(op, b1, b2, disp) \
({ \
- _EMIT4_DISP(op | reg_high(b1) << 16 | \
- reg_high(b2) << 8, disp); \
+ _EMIT4_DISP((op) | reg_high(b1) << 16 | \
+ reg_high(b2) << 8, (disp)); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -181,21 +180,27 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT4_IMM(op, b1, imm) \
({ \
unsigned int __imm = (imm) & 0xffff; \
- _EMIT4(op | reg_high(b1) << 16 | __imm); \
+ _EMIT4((op) | reg_high(b1) << 16 | __imm); \
REG_SET_SEEN(b1); \
})
#define EMIT4_PCREL(op, pcrel) \
({ \
long __pcrel = ((pcrel) >> 1) & 0xffff; \
- _EMIT4(op | __pcrel); \
+ _EMIT4((op) | __pcrel); \
+})
+
+#define EMIT4_PCREL_RIC(op, mask, target) \
+({ \
+ int __rel = ((target) - jit->prg) / 2; \
+ _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \
})
#define _EMIT6(op1, op2) \
({ \
if (jit->prg_buf) { \
- *(u32 *) (jit->prg_buf + jit->prg) = op1; \
- *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \
+ *(u32 *) (jit->prg_buf + jit->prg) = (op1); \
+ *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \
} \
jit->prg += 6; \
})
@@ -203,20 +208,20 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define _EMIT6_DISP(op1, op2, disp) \
({ \
unsigned int __disp = (disp) & 0xfff; \
- _EMIT6(op1 | __disp, op2); \
+ _EMIT6((op1) | __disp, op2); \
})
#define _EMIT6_DISP_LH(op1, op2, disp) \
({ \
- u32 _disp = (u32) disp; \
+ u32 _disp = (u32) (disp); \
unsigned int __disp_h = _disp & 0xff000; \
unsigned int __disp_l = _disp & 0x00fff; \
- _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \
+ _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \
})
#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \
({ \
- _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \
+ _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \
reg_high(b3) << 8, op2, disp); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
@@ -226,8 +231,8 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \
- op2 | mask << 12); \
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \
+ (op2) | (mask) << 12); \
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
@@ -235,68 +240,83 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
#define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \
({ \
int rel = (jit->labels[label] - jit->prg) >> 1; \
- _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \
- (rel & 0xffff), op2 | (imm & 0xff) << 8); \
+ _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \
+ (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \
REG_SET_SEEN(b1); \
- BUILD_BUG_ON(((unsigned long) imm) > 0xff); \
+ BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \
})
#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \
({ \
/* Branch instruction needs 6 bytes */ \
- int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\
- _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \
+ int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\
+ _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
REG_SET_SEEN(b1); \
REG_SET_SEEN(b2); \
})
#define EMIT6_PCREL_RILB(op, b, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
REG_SET_SEEN(b); \
})
#define EMIT6_PCREL_RIL(op, target) \
({ \
- int rel = (target - jit->prg) / 2; \
- _EMIT6(op | rel >> 16, rel & 0xffff); \
+ unsigned int rel = (int)((target) - jit->prg) / 2; \
+ _EMIT6((op) | rel >> 16, rel & 0xffff); \
+})
+
+#define EMIT6_PCREL_RILC(op, mask, target) \
+({ \
+ EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \
})
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
- _EMIT6(op | (__imm >> 16), __imm & 0xffff); \
+ _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \
})
#define EMIT6_IMM(op, b1, imm) \
({ \
- _EMIT6_IMM(op | reg_high(b1) << 16, imm); \
+ _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \
REG_SET_SEEN(b1); \
})
-#define EMIT_CONST_U32(val) \
+#define _EMIT_CONST_U32(val) \
({ \
unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
- jit->seen |= SEEN_LITERAL; \
+ ret = jit->lit32; \
if (jit->prg_buf) \
- *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \
- jit->lit += 4; \
+ *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
+ jit->lit32 += 4; \
ret; \
})
-#define EMIT_CONST_U64(val) \
+#define EMIT_CONST_U32(val) \
({ \
- unsigned int ret; \
- ret = jit->lit - jit->base_ip; \
jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U32(val) - jit->base_ip; \
+})
+
+#define _EMIT_CONST_U64(val) \
+({ \
+ unsigned int ret; \
+ ret = jit->lit64; \
if (jit->prg_buf) \
- *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \
- jit->lit += 8; \
+ *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
+ jit->lit64 += 8; \
ret; \
})
+#define EMIT_CONST_U64(val) \
+({ \
+ jit->seen |= SEEN_LITERAL; \
+ _EMIT_CONST_U64(val) - jit->base_ip; \
+})
+
#define EMIT_ZERO(b1) \
({ \
if (!fp->aux->verifier_zext) { \
@@ -307,6 +327,67 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
})
/*
+ * Return whether this is the first pass. The first pass is special, since we
+ * don't know any sizes yet, and thus must be conservative.
+ */
+static bool is_first_pass(struct bpf_jit *jit)
+{
+ return jit->size == 0;
+}
+
+/*
+ * Return whether this is the code generation pass. The code generation pass is
+ * special, since we should change as little as possible.
+ */
+static bool is_codegen_pass(struct bpf_jit *jit)
+{
+ return jit->prg_buf;
+}
+
+/*
+ * Return whether "rel" can be encoded as a short PC-relative offset
+ */
+static bool is_valid_rel(int rel)
+{
+ return rel >= -65536 && rel <= 65534;
+}
+
+/*
+ * Return whether "off" can be reached using a short PC-relative offset
+ */
+static bool can_use_rel(struct bpf_jit *jit, int off)
+{
+ return is_valid_rel(off - jit->prg);
+}
+
+/*
+ * Return whether given displacement can be encoded using
+ * Long-Displacement Facility
+ */
+static bool is_valid_ldisp(int disp)
+{
+ return disp >= -524288 && disp <= 524287;
+}
+
+/*
+ * Return whether the next 32-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit32 - jit->base_ip);
+}
+
+/*
+ * Return whether the next 64-bit literal pool entry can be referenced using
+ * Long-Displacement Facility
+ */
+static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
+{
+ return is_valid_ldisp(jit->lit64 - jit->base_ip);
+}
+
+/*
* Fill whole space with illegal instructions
*/
static void jit_fill_hole(void *area, unsigned int size)
@@ -383,9 +464,18 @@ static int get_end(struct bpf_jit *jit, int start)
*/
static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
{
-
+ const int last = 15, save_restore_size = 6;
int re = 6, rs;
+ if (is_first_pass(jit)) {
+ /*
+ * We don't know yet which registers are used. Reserve space
+ * conservatively.
+ */
+ jit->prg += (last - re + 1) * save_restore_size;
+ return;
+ }
+
do {
rs = get_start(jit, re);
if (!rs)
@@ -396,7 +486,7 @@ static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
else
restore_regs(jit, rs, re, stack_depth);
re++;
- } while (re <= 15);
+ } while (re <= last);
}
/*
@@ -420,21 +510,28 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
/* Save registers */
save_restore_regs(jit, REGS_SAVE, stack_depth);
/* Setup literal pool */
- if (jit->seen & SEEN_LITERAL) {
- /* basr %r13,0 */
- EMIT2(0x0d00, REG_L, REG_0);
- jit->base_ip = jit->prg;
+ if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
+ if (!is_first_pass(jit) &&
+ is_valid_ldisp(jit->size - (jit->prg + 2))) {
+ /* basr %l,0 */
+ EMIT2(0x0d00, REG_L, REG_0);
+ jit->base_ip = jit->prg;
+ } else {
+ /* larl %l,lit32_start */
+ EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
+ jit->base_ip = jit->lit32_start;
+ }
}
/* Setup stack and backchain */
- if (jit->seen & SEEN_STACK) {
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* lgr %w1,%r15 (backchain) */
EMIT4(0xb9040000, REG_W1, REG_15);
/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
/* aghi %r15,-STK_OFF */
EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
- if (jit->seen & SEEN_FUNC)
+ if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
/* stg %w1,152(%r15) (backchain) */
EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
REG_15, 152);
@@ -446,12 +543,6 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
*/
static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
{
- /* Return 0 */
- if (jit->seen & SEEN_RET0) {
- jit->ret0_ip = jit->prg;
- /* lghi %b0,0 */
- EMIT4_IMM(0xa7090000, BPF_REG_0, 0);
- }
jit->exit_ip = jit->prg;
/* Load exit code: lgr %r2,%b0 */
EMIT4(0xb9040000, REG_2, BPF_REG_0);
@@ -476,7 +567,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
_EMIT2(0x07fe);
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable &&
- (jit->seen & SEEN_FUNC)) {
+ (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) {
jit->r1_thunk_ip = jit->prg;
/* Generate __s390_indirect_jump_r1 thunk */
if (test_facility(35)) {
@@ -506,16 +597,14 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
int i, bool extra_pass)
{
struct bpf_insn *insn = &fp->insnsi[i];
- int jmp_off, last, insn_count = 1;
u32 dst_reg = insn->dst_reg;
u32 src_reg = insn->src_reg;
+ int last, insn_count = 1;
u32 *addrs = jit->addrs;
s32 imm = insn->imm;
s16 off = insn->off;
unsigned int mask;
- if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX)
- jit->seen |= SEEN_REG_AX;
switch (insn->code) {
/*
* BPF_MOV
@@ -549,9 +638,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
u64 imm64;
imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
- /* lg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm64));
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
insn_count = 2;
break;
}
@@ -680,9 +768,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 0);
/* lr %w1,%dst */
EMIT2(0x1800, REG_W1, dst_reg);
- /* dl %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
- EMIT_CONST_U32(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
+ /* dl %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U32(imm));
+ } else {
+ /* lgfrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
+ _EMIT_CONST_U32(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlr %w0,%dst */
+ EMIT4(0xb9970000, REG_W0, dst_reg);
+ }
/* llgfr %dst,%rc */
EMIT4(0xb9160000, dst_reg, rc_reg);
if (insn_is_zext(&insn[1]))
@@ -704,9 +801,18 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7090000, REG_W0, 0);
/* lgr %w1,%dst */
EMIT4(0xb9040000, REG_W1, dst_reg);
- /* dlg %w0,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* dlg %w0,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %dst,imm */
+ EMIT6_PCREL_RILB(0xc4080000, dst_reg,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* dlgr %w0,%dst */
+ EMIT4(0xb9870000, REG_W0, dst_reg);
+ }
/* lgr %dst,%rc */
EMIT4(0xb9040000, dst_reg, rc_reg);
break;
@@ -729,9 +835,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
- /* ng %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* ng %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0080,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ngr %dst,%w0 */
+ EMIT4(0xb9800000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_OR
@@ -751,9 +867,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
- /* og %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* og %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0081,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* ogr %dst,%w0 */
+ EMIT4(0xb9810000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_XOR
@@ -775,9 +901,19 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT_ZERO(dst_reg);
break;
case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
- /* xg %dst,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L,
- EMIT_CONST_U64(imm));
+ if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
+ /* xg %dst,<d(imm)>(%l) */
+ EMIT6_DISP_LH(0xe3000000, 0x0082,
+ dst_reg, REG_0, REG_L,
+ EMIT_CONST_U64(imm));
+ } else {
+ /* lgrl %w0,imm */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W0,
+ _EMIT_CONST_U64(imm));
+ jit->seen |= SEEN_LITERAL;
+ /* xgr %dst,%w0 */
+ EMIT4(0xb9820000, dst_reg, REG_W0);
+ }
break;
/*
* BPF_LSH
@@ -1023,9 +1159,8 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
REG_SET_SEEN(BPF_REG_5);
jit->seen |= SEEN_FUNC;
- /* lg %w1,<d(imm)>(%l) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
- EMIT_CONST_U64(func));
+ /* lgrl %w1,func */
+ EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) {
/* brasl %r14,__s390_indirect_jump_r1 */
EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
@@ -1054,9 +1189,17 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
/* llgf %w1,map.max_entries(%b2) */
EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
offsetof(struct bpf_array, map.max_entries));
- /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */
- EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
- REG_W1, 0, 0xa);
+ /* if ((u32)%b3 >= (u32)%w1) goto out; */
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clrj %b3,%w1,0xa,label0 */
+ EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3,
+ REG_W1, 0, 0xa);
+ } else {
+ /* clr %b3,%w1 */
+ EMIT2(0x1500, BPF_REG_3, REG_W1);
+ /* brcl 0xa,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]);
+ }
/*
* if (tail_call_cnt++ > MAX_TAIL_CALL_CNT)
@@ -1071,9 +1214,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4_IMM(0xa7080000, REG_W0, 1);
/* laal %w1,%w0,off(%r15) */
EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
- /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
- MAX_TAIL_CALL_CNT, 0, 0x2);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */
+ EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1,
+ MAX_TAIL_CALL_CNT, 0, 0x2);
+ } else {
+ /* clfi %w1,MAX_TAIL_CALL_CNT */
+ EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT);
+ /* brcl 0x2,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]);
+ }
/*
* prog = array->ptrs[index];
@@ -1085,11 +1235,16 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
EMIT4(0xb9160000, REG_1, BPF_REG_3);
/* sllg %r1,%r1,3: %r1 *= 8 */
EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
- /* lg %r1,prog(%b2,%r1) */
- EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2,
+ /* ltg %r1,prog(%b2,%r1) */
+ EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
REG_1, offsetof(struct bpf_array, ptrs));
- /* clgij %r1,0,0x8,label0 */
- EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8);
+ if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) {
+ /* brc 0x8,label0 */
+ EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]);
+ } else {
+ /* brcl 0x8,label0 */
+ EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]);
+ }
/*
* Restore registers before calling function
@@ -1110,7 +1265,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
break;
case BPF_JMP | BPF_EXIT: /* return b0 */
last = (i == fp->len - 1) ? 1 : 0;
- if (last && !(jit->seen & SEEN_RET0))
+ if (last)
break;
/* j <exit> */
EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg);
@@ -1246,36 +1401,83 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
goto branch_oc;
branch_ks:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* crj or cgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, REG_W1, i, off, mask);
+ /* cfi or cgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_ku:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* lgfi %w1,imm (load sign extend imm) */
- EMIT6_IMM(0xc0010000, REG_W1, imm);
- /* clrj or clgrj %dst,%w1,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, REG_W1, i, off, mask);
+ /* clfi or clgfi %dst,imm */
+ EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000,
+ dst_reg, imm);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xs:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* crj or cgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* crj or cgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* cr or cgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1900, dst_reg, src_reg);
+ else
+ EMIT4(0xb9200000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_xu:
is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
- /* clrj or clgrj %dst,%src,mask,off */
- EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
- dst_reg, src_reg, i, off, mask);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* clrj or clgrj %dst,%src,mask,off */
+ EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
+ dst_reg, src_reg, i, off, mask);
+ } else {
+ /* clr or clgr %dst,%src */
+ if (is_jmp32)
+ EMIT2(0x1500, dst_reg, src_reg);
+ else
+ EMIT4(0xb9210000, dst_reg, src_reg);
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
branch_oc:
- /* brc mask,jmp_off (branch instruction needs 4 bytes) */
- jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4);
- EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off);
+ if (!is_first_pass(jit) &&
+ can_use_rel(jit, addrs[i + off + 1])) {
+ /* brc mask,off */
+ EMIT4_PCREL_RIC(0xa7040000,
+ mask >> 12, addrs[i + off + 1]);
+ } else {
+ /* brcl mask,off */
+ EMIT6_PCREL_RILC(0xc0040000,
+ mask >> 12, addrs[i + off + 1]);
+ }
break;
}
default: /* too complex, give up */
@@ -1286,28 +1488,67 @@ branch_oc:
}
/*
+ * Return whether new i-th instruction address does not violate any invariant
+ */
+static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
+{
+ /* On the first pass anything goes */
+ if (is_first_pass(jit))
+ return true;
+
+ /* The codegen pass must not change anything */
+ if (is_codegen_pass(jit))
+ return jit->addrs[i] == jit->prg;
+
+ /* Passes in between must not increase code size */
+ return jit->addrs[i] >= jit->prg;
+}
+
+/*
+ * Update the address of i-th instruction
+ */
+static int bpf_set_addr(struct bpf_jit *jit, int i)
+{
+ if (!bpf_is_new_addr_sane(jit, i))
+ return -1;
+ jit->addrs[i] = jit->prg;
+ return 0;
+}
+
+/*
* Compile eBPF program into s390x code
*/
static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
bool extra_pass)
{
- int i, insn_count;
+ int i, insn_count, lit32_size, lit64_size;
- jit->lit = jit->lit_start;
+ jit->lit32 = jit->lit32_start;
+ jit->lit64 = jit->lit64_start;
jit->prg = 0;
bpf_jit_prologue(jit, fp->aux->stack_depth);
+ if (bpf_set_addr(jit, 0) < 0)
+ return -1;
for (i = 0; i < fp->len; i += insn_count) {
insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
if (insn_count < 0)
return -1;
/* Next instruction address */
- jit->addrs[i + insn_count] = jit->prg;
+ if (bpf_set_addr(jit, i + insn_count) < 0)
+ return -1;
}
bpf_jit_epilogue(jit, fp->aux->stack_depth);
- jit->lit_start = jit->prg;
- jit->size = jit->lit;
+ lit32_size = jit->lit32 - jit->lit32_start;
+ lit64_size = jit->lit64 - jit->lit64_start;
+ jit->lit32_start = jit->prg;
+ if (lit32_size)
+ jit->lit32_start = ALIGN(jit->lit32_start, 4);
+ jit->lit64_start = jit->lit32_start + lit32_size;
+ if (lit64_size)
+ jit->lit64_start = ALIGN(jit->lit64_start, 8);
+ jit->size = jit->lit64_start + lit64_size;
jit->size_prg = jit->prg;
return 0;
}
@@ -1369,7 +1610,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
}
memset(&jit, 0, sizeof(jit));
- jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
+ jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
if (jit.addrs == NULL) {
fp = orig_fp;
goto out;
@@ -1388,12 +1629,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
/*
* Final pass: Allocate and generate program
*/
- if (jit.size >= BPF_SIZE_MAX) {
- fp = orig_fp;
- goto free_addrs;
- }
-
- header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole);
+ header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole);
if (!header) {
fp = orig_fp;
goto free_addrs;
@@ -1422,7 +1658,7 @@ skip_init_ctx:
if (!fp->is_func || extra_pass) {
bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
free_addrs:
- kfree(jit.addrs);
+ kvfree(jit.addrs);
kfree(jit_data);
fp->aux->jit_data = NULL;
}
diff --git a/arch/sh/boards/mach-sdk7786/nmi.c b/arch/sh/boards/mach-sdk7786/nmi.c
index c2e09d798537..afba49679a12 100644
--- a/arch/sh/boards/mach-sdk7786/nmi.c
+++ b/arch/sh/boards/mach-sdk7786/nmi.c
@@ -37,7 +37,7 @@ static int __init nmi_mode_setup(char *str)
nmi_mode = NMI_MODE_ANY;
else {
nmi_mode = NMI_MODE_UNKNOWN;
- pr_warning("Unknown NMI mode %s\n", str);
+ pr_warn("Unknown NMI mode %s\n", str);
}
printk("Set NMI mode to %d\n", nmi_mode);
diff --git a/arch/sh/boot/compressed/misc.c b/arch/sh/boot/compressed/misc.c
index c15cac9251b9..e69ec12cbbe6 100644
--- a/arch/sh/boot/compressed/misc.c
+++ b/arch/sh/boot/compressed/misc.c
@@ -111,6 +111,11 @@ void __stack_chk_fail(void)
error("stack-protector: Kernel stack is corrupted\n");
}
+/* Needed because vmlinux.lds.h references this */
+void ftrace_stub(void)
+{
+}
+
#ifdef CONFIG_SUPERH64
#define stackalign 8
#else
diff --git a/arch/sh/drivers/Makefile b/arch/sh/drivers/Makefile
index 3e93b434e604..56b0acace6e7 100644
--- a/arch/sh/drivers/Makefile
+++ b/arch/sh/drivers/Makefile
@@ -3,7 +3,7 @@
# Makefile for the Linux SuperH-specific device drivers.
#
-obj-y += dma/
+obj-y += dma/ platform_early.o
obj-$(CONFIG_PCI) += pci/
obj-$(CONFIG_SUPERHYWAY) += superhyway/
diff --git a/arch/sh/drivers/pci/fixups-sdk7786.c b/arch/sh/drivers/pci/fixups-sdk7786.c
index 8cbfa5310a4b..6972af7b4e93 100644
--- a/arch/sh/drivers/pci/fixups-sdk7786.c
+++ b/arch/sh/drivers/pci/fixups-sdk7786.c
@@ -53,7 +53,7 @@ static int __init sdk7786_pci_init(void)
/* Warn about forced rerouting if slot#3 is occupied */
if ((data & PCIECR_PRST3) == 0) {
- pr_warning("Unreachable card detected in slot#3\n");
+ pr_warn("Unreachable card detected in slot#3\n");
return -EBUSY;
}
} else
diff --git a/arch/sh/drivers/platform_early.c b/arch/sh/drivers/platform_early.c
new file mode 100644
index 000000000000..f6d148451dfc
--- /dev/null
+++ b/arch/sh/drivers/platform_early.c
@@ -0,0 +1,347 @@
+// SPDX--License-Identifier: GPL-2.0
+
+#include <asm/platform_early.h>
+#include <linux/mod_devicetable.h>
+#include <linux/pm.h>
+
+static __initdata LIST_HEAD(sh_early_platform_driver_list);
+static __initdata LIST_HEAD(sh_early_platform_device_list);
+
+static const struct platform_device_id *
+platform_match_id(const struct platform_device_id *id,
+ struct platform_device *pdev)
+{
+ while (id->name[0]) {
+ if (strcmp(pdev->name, id->name) == 0) {
+ pdev->id_entry = id;
+ return id;
+ }
+ id++;
+ }
+ return NULL;
+}
+
+static int platform_match(struct device *dev, struct device_driver *drv)
+{
+ struct platform_device *pdev = to_platform_device(dev);
+ struct platform_driver *pdrv = to_platform_driver(drv);
+
+ /* When driver_override is set, only bind to the matching driver */
+ if (pdev->driver_override)
+ return !strcmp(pdev->driver_override, drv->name);
+
+ /* Then try to match against the id table */
+ if (pdrv->id_table)
+ return platform_match_id(pdrv->id_table, pdev) != NULL;
+
+ /* fall-back to driver name match */
+ return (strcmp(pdev->name, drv->name) == 0);
+}
+
+#ifdef CONFIG_PM
+static void device_pm_init_common(struct device *dev)
+{
+ if (!dev->power.early_init) {
+ spin_lock_init(&dev->power.lock);
+ dev->power.qos = NULL;
+ dev->power.early_init = true;
+ }
+}
+
+static void pm_runtime_early_init(struct device *dev)
+{
+ dev->power.disable_depth = 1;
+ device_pm_init_common(dev);
+}
+#else
+static void pm_runtime_early_init(struct device *dev) {}
+#endif
+
+/**
+ * sh_early_platform_driver_register - register early platform driver
+ * @epdrv: sh_early_platform driver structure
+ * @buf: string passed from early_param()
+ *
+ * Helper function for sh_early_platform_init() / sh_early_platform_init_buffer()
+ */
+int __init sh_early_platform_driver_register(struct sh_early_platform_driver *epdrv,
+ char *buf)
+{
+ char *tmp;
+ int n;
+
+ /* Simply add the driver to the end of the global list.
+ * Drivers will by default be put on the list in compiled-in order.
+ */
+ if (!epdrv->list.next) {
+ INIT_LIST_HEAD(&epdrv->list);
+ list_add_tail(&epdrv->list, &sh_early_platform_driver_list);
+ }
+
+ /* If the user has specified device then make sure the driver
+ * gets prioritized. The driver of the last device specified on
+ * command line will be put first on the list.
+ */
+ n = strlen(epdrv->pdrv->driver.name);
+ if (buf && !strncmp(buf, epdrv->pdrv->driver.name, n)) {
+ list_move(&epdrv->list, &sh_early_platform_driver_list);
+
+ /* Allow passing parameters after device name */
+ if (buf[n] == '\0' || buf[n] == ',')
+ epdrv->requested_id = -1;
+ else {
+ epdrv->requested_id = simple_strtoul(&buf[n + 1],
+ &tmp, 10);
+
+ if (buf[n] != '.' || (tmp == &buf[n + 1])) {
+ epdrv->requested_id = EARLY_PLATFORM_ID_ERROR;
+ n = 0;
+ } else
+ n += strcspn(&buf[n + 1], ",") + 1;
+ }
+
+ if (buf[n] == ',')
+ n++;
+
+ if (epdrv->bufsize) {
+ memcpy(epdrv->buffer, &buf[n],
+ min_t(int, epdrv->bufsize, strlen(&buf[n]) + 1));
+ epdrv->buffer[epdrv->bufsize - 1] = '\0';
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * sh_early_platform_add_devices - adds a number of early platform devices
+ * @devs: array of early platform devices to add
+ * @num: number of early platform devices in array
+ *
+ * Used by early architecture code to register early platform devices and
+ * their platform data.
+ */
+void __init sh_early_platform_add_devices(struct platform_device **devs, int num)
+{
+ struct device *dev;
+ int i;
+
+ /* simply add the devices to list */
+ for (i = 0; i < num; i++) {
+ dev = &devs[i]->dev;
+
+ if (!dev->devres_head.next) {
+ pm_runtime_early_init(dev);
+ INIT_LIST_HEAD(&dev->devres_head);
+ list_add_tail(&dev->devres_head,
+ &sh_early_platform_device_list);
+ }
+ }
+}
+
+/**
+ * sh_early_platform_driver_register_all - register early platform drivers
+ * @class_str: string to identify early platform driver class
+ *
+ * Used by architecture code to register all early platform drivers
+ * for a certain class. If omitted then only early platform drivers
+ * with matching kernel command line class parameters will be registered.
+ */
+void __init sh_early_platform_driver_register_all(char *class_str)
+{
+ /* The "class_str" parameter may or may not be present on the kernel
+ * command line. If it is present then there may be more than one
+ * matching parameter.
+ *
+ * Since we register our early platform drivers using early_param()
+ * we need to make sure that they also get registered in the case
+ * when the parameter is missing from the kernel command line.
+ *
+ * We use parse_early_options() to make sure the early_param() gets
+ * called at least once. The early_param() may be called more than
+ * once since the name of the preferred device may be specified on
+ * the kernel command line. sh_early_platform_driver_register() handles
+ * this case for us.
+ */
+ parse_early_options(class_str);
+}
+
+/**
+ * sh_early_platform_match - find early platform device matching driver
+ * @epdrv: early platform driver structure
+ * @id: id to match against
+ */
+static struct platform_device * __init
+sh_early_platform_match(struct sh_early_platform_driver *epdrv, int id)
+{
+ struct platform_device *pd;
+
+ list_for_each_entry(pd, &sh_early_platform_device_list, dev.devres_head)
+ if (platform_match(&pd->dev, &epdrv->pdrv->driver))
+ if (pd->id == id)
+ return pd;
+
+ return NULL;
+}
+
+/**
+ * sh_early_platform_left - check if early platform driver has matching devices
+ * @epdrv: early platform driver structure
+ * @id: return true if id or above exists
+ */
+static int __init sh_early_platform_left(struct sh_early_platform_driver *epdrv,
+ int id)
+{
+ struct platform_device *pd;
+
+ list_for_each_entry(pd, &sh_early_platform_device_list, dev.devres_head)
+ if (platform_match(&pd->dev, &epdrv->pdrv->driver))
+ if (pd->id >= id)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * sh_early_platform_driver_probe_id - probe drivers matching class_str and id
+ * @class_str: string to identify early platform driver class
+ * @id: id to match against
+ * @nr_probe: number of platform devices to successfully probe before exiting
+ */
+static int __init sh_early_platform_driver_probe_id(char *class_str,
+ int id,
+ int nr_probe)
+{
+ struct sh_early_platform_driver *epdrv;
+ struct platform_device *match;
+ int match_id;
+ int n = 0;
+ int left = 0;
+
+ list_for_each_entry(epdrv, &sh_early_platform_driver_list, list) {
+ /* only use drivers matching our class_str */
+ if (strcmp(class_str, epdrv->class_str))
+ continue;
+
+ if (id == -2) {
+ match_id = epdrv->requested_id;
+ left = 1;
+
+ } else {
+ match_id = id;
+ left += sh_early_platform_left(epdrv, id);
+
+ /* skip requested id */
+ switch (epdrv->requested_id) {
+ case EARLY_PLATFORM_ID_ERROR:
+ case EARLY_PLATFORM_ID_UNSET:
+ break;
+ default:
+ if (epdrv->requested_id == id)
+ match_id = EARLY_PLATFORM_ID_UNSET;
+ }
+ }
+
+ switch (match_id) {
+ case EARLY_PLATFORM_ID_ERROR:
+ pr_warn("%s: unable to parse %s parameter\n",
+ class_str, epdrv->pdrv->driver.name);
+ /* fall-through */
+ case EARLY_PLATFORM_ID_UNSET:
+ match = NULL;
+ break;
+ default:
+ match = sh_early_platform_match(epdrv, match_id);
+ }
+
+ if (match) {
+ /*
+ * Set up a sensible init_name to enable
+ * dev_name() and others to be used before the
+ * rest of the driver core is initialized.
+ */
+ if (!match->dev.init_name && slab_is_available()) {
+ if (match->id != -1)
+ match->dev.init_name =
+ kasprintf(GFP_KERNEL, "%s.%d",
+ match->name,
+ match->id);
+ else
+ match->dev.init_name =
+ kasprintf(GFP_KERNEL, "%s",
+ match->name);
+
+ if (!match->dev.init_name)
+ return -ENOMEM;
+ }
+
+ if (epdrv->pdrv->probe(match))
+ pr_warn("%s: unable to probe %s early.\n",
+ class_str, match->name);
+ else
+ n++;
+ }
+
+ if (n >= nr_probe)
+ break;
+ }
+
+ if (left)
+ return n;
+ else
+ return -ENODEV;
+}
+
+/**
+ * sh_early_platform_driver_probe - probe a class of registered drivers
+ * @class_str: string to identify early platform driver class
+ * @nr_probe: number of platform devices to successfully probe before exiting
+ * @user_only: only probe user specified early platform devices
+ *
+ * Used by architecture code to probe registered early platform drivers
+ * within a certain class. For probe to happen a registered early platform
+ * device matching a registered early platform driver is needed.
+ */
+int __init sh_early_platform_driver_probe(char *class_str,
+ int nr_probe,
+ int user_only)
+{
+ int k, n, i;
+
+ n = 0;
+ for (i = -2; n < nr_probe; i++) {
+ k = sh_early_platform_driver_probe_id(class_str, i, nr_probe - n);
+
+ if (k < 0)
+ break;
+
+ n += k;
+
+ if (user_only)
+ break;
+ }
+
+ return n;
+}
+
+/**
+ * sh_early_platform_cleanup - clean up early platform code
+ */
+static int __init sh_early_platform_cleanup(void)
+{
+ struct platform_device *pd, *pd2;
+
+ /* clean up the devres list used to chain devices */
+ list_for_each_entry_safe(pd, pd2, &sh_early_platform_device_list,
+ dev.devres_head) {
+ list_del(&pd->dev.devres_head);
+ memset(&pd->dev.devres_head, 0, sizeof(pd->dev.devres_head));
+ }
+
+ return 0;
+}
+/*
+ * This must happen once after all early devices are probed but before probing
+ * real platform devices.
+ */
+subsys_initcall(sh_early_platform_cleanup);
diff --git a/arch/sh/include/asm/platform_early.h b/arch/sh/include/asm/platform_early.h
new file mode 100644
index 000000000000..fc802137c37d
--- /dev/null
+++ b/arch/sh/include/asm/platform_early.h
@@ -0,0 +1,61 @@
+/* SPDX--License-Identifier: GPL-2.0 */
+
+#ifndef __PLATFORM_EARLY__
+#define __PLATFORM_EARLY__
+
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+struct sh_early_platform_driver {
+ const char *class_str;
+ struct platform_driver *pdrv;
+ struct list_head list;
+ int requested_id;
+ char *buffer;
+ int bufsize;
+};
+
+#define EARLY_PLATFORM_ID_UNSET -2
+#define EARLY_PLATFORM_ID_ERROR -3
+
+extern int sh_early_platform_driver_register(struct sh_early_platform_driver *epdrv,
+ char *buf);
+extern void sh_early_platform_add_devices(struct platform_device **devs, int num);
+
+static inline int is_sh_early_platform_device(struct platform_device *pdev)
+{
+ return !pdev->dev.driver;
+}
+
+extern void sh_early_platform_driver_register_all(char *class_str);
+extern int sh_early_platform_driver_probe(char *class_str,
+ int nr_probe, int user_only);
+
+#define sh_early_platform_init(class_string, platdrv) \
+ sh_early_platform_init_buffer(class_string, platdrv, NULL, 0)
+
+#ifndef MODULE
+#define sh_early_platform_init_buffer(class_string, platdrv, buf, bufsiz) \
+static __initdata struct sh_early_platform_driver early_driver = { \
+ .class_str = class_string, \
+ .buffer = buf, \
+ .bufsize = bufsiz, \
+ .pdrv = platdrv, \
+ .requested_id = EARLY_PLATFORM_ID_UNSET, \
+}; \
+static int __init sh_early_platform_driver_setup_func(char *buffer) \
+{ \
+ return sh_early_platform_driver_register(&early_driver, buffer); \
+} \
+early_param(class_string, sh_early_platform_driver_setup_func)
+#else /* MODULE */
+#define sh_early_platform_init_buffer(class_string, platdrv, buf, bufsiz) \
+static inline char *sh_early_platform_driver_setup_func(void) \
+{ \
+ return bufsiz ? buf : NULL; \
+}
+#endif /* MODULE */
+
+#endif /* __PLATFORM_EARLY__ */
diff --git a/arch/sh/include/cpu-sh4/cpu/sh7734.h b/arch/sh/include/cpu-sh4/cpu/sh7734.h
index 96f0246ad2f2..82b63208135a 100644
--- a/arch/sh/include/cpu-sh4/cpu/sh7734.h
+++ b/arch/sh/include/cpu-sh4/cpu/sh7734.h
@@ -134,7 +134,7 @@ enum {
GPIO_FN_EX_WAIT1, GPIO_FN_SD1_DAT0_A, GPIO_FN_DREQ2, GPIO_FN_CAN1_TX_C,
GPIO_FN_ET0_LINK_C, GPIO_FN_ET0_ETXD5_A,
GPIO_FN_EX_WAIT0, GPIO_FN_TCLK1_B,
- GPIO_FN_RD_WR, GPIO_FN_TCLK0,
+ GPIO_FN_RD_WR, GPIO_FN_TCLK0, GPIO_FN_CAN_CLK_B, GPIO_FN_ET0_ETXD4,
GPIO_FN_EX_CS5, GPIO_FN_SD1_CMD_A, GPIO_FN_ATADIR, GPIO_FN_QSSL_B,
GPIO_FN_ET0_ETXD3_A,
GPIO_FN_EX_CS4, GPIO_FN_SD1_WP_A, GPIO_FN_ATAWR, GPIO_FN_QMI_QIO1_B,
diff --git a/arch/sh/kernel/cpu/sh2/setup-sh7619.c b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
index f5b6841ef7e1..b1c877b6a420 100644
--- a/arch/sh/kernel/cpu/sh2/setup-sh7619.c
+++ b/arch/sh/kernel/cpu/sh2/setup-sh7619.c
@@ -12,6 +12,7 @@
#include <linux/sh_eth.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -199,6 +200,6 @@ void __init plat_early_device_setup(void)
/* enable CMT clock */
__raw_writeb(__raw_readb(STBCR3) & ~0x10, STBCR3);
- early_platform_add_devices(sh7619_early_devices,
+ sh_early_platform_add_devices(sh7619_early_devices,
ARRAY_SIZE(sh7619_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-mxg.c b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
index 52350ad0b0a2..cefa07924c16 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-mxg.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-mxg.c
@@ -9,6 +9,7 @@
#include <linux/serial.h>
#include <linux/serial_sci.h>
#include <linux/sh_timer.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -169,6 +170,6 @@ static struct platform_device *mxg_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(mxg_early_devices,
+ sh_early_platform_add_devices(mxg_early_devices,
ARRAY_SIZE(mxg_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
index b51ed761ae08..28f1bebf3405 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7201.c
@@ -11,6 +11,7 @@
#include <linux/serial_sci.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -412,6 +413,6 @@ void __init plat_early_device_setup(void)
/* enable MTU2 clock */
__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
- early_platform_add_devices(sh7201_early_devices,
+ sh_early_platform_add_devices(sh7201_early_devices,
ARRAY_SIZE(sh7201_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
index 89b3e49fc250..4839f3aaeb4c 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7203.c
@@ -10,6 +10,7 @@
#include <linux/serial_sci.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -349,6 +350,6 @@ void __init plat_early_device_setup(void)
/* enable MTU2 clock */
__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
- early_platform_add_devices(sh7203_early_devices,
+ sh_early_platform_add_devices(sh7203_early_devices,
ARRAY_SIZE(sh7203_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
index 36ff3a3139da..68add5af4cc5 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7206.c
@@ -11,6 +11,7 @@
#include <linux/serial_sci.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -285,6 +286,6 @@ void __init plat_early_device_setup(void)
/* enable MTU2 clock */
__raw_writeb(__raw_readb(STBCR3) & ~0x20, STBCR3);
- early_platform_add_devices(sh7206_early_devices,
+ sh_early_platform_add_devices(sh7206_early_devices,
ARRAY_SIZE(sh7206_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7264.c b/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
index d199618d877c..8a1cb613dd2e 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7264.c
@@ -11,6 +11,7 @@
#include <linux/usb/r8a66597.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -546,6 +547,6 @@ static struct platform_device *sh7264_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7264_early_devices,
+ sh_early_platform_add_devices(sh7264_early_devices,
ARRAY_SIZE(sh7264_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh2a/setup-sh7269.c b/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
index 9095c960b455..8b1ef3028320 100644
--- a/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
+++ b/arch/sh/kernel/cpu/sh2a/setup-sh7269.c
@@ -12,6 +12,7 @@
#include <linux/usb/r8a66597.h>
#include <linux/sh_timer.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -562,6 +563,6 @@ static struct platform_device *sh7269_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7269_early_devices,
+ sh_early_platform_add_devices(sh7269_early_devices,
ARRAY_SIZE(sh7269_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh3.c b/arch/sh/kernel/cpu/sh3/setup-sh3.c
index 8058c01cf09d..cf2a3f09fee4 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh3.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh3.c
@@ -8,6 +8,7 @@
#include <linux/init.h>
#include <linux/irq.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
/* All SH3 devices are equipped with IRQ0->5 (except sh7708) */
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7705.c b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
index e19d1ce7b6ad..0544134b3f20 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7705.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7705.c
@@ -14,6 +14,7 @@
#include <linux/sh_intc.h>
#include <asm/rtc.h>
#include <cpu/serial.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -178,7 +179,7 @@ static struct platform_device *sh7705_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7705_early_devices,
+ sh_early_platform_add_devices(sh7705_early_devices,
ARRAY_SIZE(sh7705_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh770x.c b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
index 5c5144bee6bc..4947f57748bc 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh770x.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh770x.c
@@ -18,6 +18,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <cpu/serial.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -230,7 +231,7 @@ static struct platform_device *sh770x_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh770x_early_devices,
+ sh_early_platform_add_devices(sh770x_early_devices,
ARRAY_SIZE(sh770x_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7710.c b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
index 4776e2495738..381910761579 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7710.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7710.c
@@ -13,6 +13,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <asm/rtc.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -177,7 +178,7 @@ static struct platform_device *sh7710_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7710_early_devices,
+ sh_early_platform_add_devices(sh7710_early_devices,
ARRAY_SIZE(sh7710_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh3/setup-sh7720.c b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
index 1d4c34e7b7db..425d067dae9b 100644
--- a/arch/sh/kernel/cpu/sh3/setup-sh7720.c
+++ b/arch/sh/kernel/cpu/sh3/setup-sh7720.c
@@ -19,6 +19,7 @@
#include <linux/sh_intc.h>
#include <linux/usb/ohci_pdriver.h>
#include <asm/rtc.h>
+#include <asm/platform_early.h>
#include <cpu/serial.h>
static struct resource rtc_resources[] = {
@@ -211,7 +212,7 @@ static struct platform_device *sh7720_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7720_early_devices,
+ sh_early_platform_add_devices(sh7720_early_devices,
ARRAY_SIZE(sh7720_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
index a40ef35d101a..e6737f3d0df2 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh4-202.c
@@ -12,6 +12,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE,
@@ -76,7 +77,7 @@ static struct platform_device *sh4202_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh4202_early_devices,
+ sh_early_platform_add_devices(sh4202_early_devices,
ARRAY_SIZE(sh4202_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7750.c b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
index b37bda66a532..19c8f1d69071 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7750.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7750.c
@@ -13,6 +13,7 @@
#include <linux/sh_intc.h>
#include <linux/serial_sci.h>
#include <generated/machtypes.h>
+#include <asm/platform_early.h>
static struct resource rtc_resources[] = {
[0] = {
@@ -161,15 +162,15 @@ void __init plat_early_device_setup(void)
if (mach_is_rts7751r2d()) {
scif_platform_data.scscr |= SCSCR_CKE1;
dev[0] = &scif_device;
- early_platform_add_devices(dev, 1);
+ sh_early_platform_add_devices(dev, 1);
} else {
dev[0] = &sci_device;
- early_platform_add_devices(dev, 1);
+ sh_early_platform_add_devices(dev, 1);
dev[0] = &scif_device;
- early_platform_add_devices(dev, 1);
+ sh_early_platform_add_devices(dev, 1);
}
- early_platform_add_devices(sh7750_early_devices,
+ sh_early_platform_add_devices(sh7750_early_devices,
ARRAY_SIZE(sh7750_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 86845da85997..14212f5d803c 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -11,6 +11,7 @@
#include <linux/sh_intc.h>
#include <linux/serial_sci.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
enum {
UNUSED = 0,
@@ -271,7 +272,7 @@ static struct platform_device *sh7760_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7760_early_devices,
+ sh_early_platform_add_devices(sh7760_early_devices,
ARRAY_SIZE(sh7760_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
index a15e25690b5f..b6015188fab1 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7343.c
@@ -12,6 +12,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <asm/clock.h>
+#include <asm/platform_early.h>
/* Serial */
static struct plat_sci_port scif0_platform_data = {
@@ -296,7 +297,7 @@ static struct platform_device *sh7343_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7343_early_devices,
+ sh_early_platform_add_devices(sh7343_early_devices,
ARRAY_SIZE(sh7343_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
index 7bd2776441ba..6676beef053e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7366.c
@@ -15,6 +15,7 @@
#include <linux/sh_intc.h>
#include <linux/usb/r8a66597.h>
#include <asm/clock.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE,
@@ -240,7 +241,7 @@ static struct platform_device *sh7366_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7366_early_devices,
+ sh_early_platform_add_devices(sh7366_early_devices,
ARRAY_SIZE(sh7366_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index 1ce65f88f060..0c6757ef63f4 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -18,6 +18,7 @@
#include <asm/clock.h>
#include <asm/mmzone.h>
#include <asm/siu.h>
+#include <asm/platform_early.h>
#include <cpu/dma-register.h>
#include <cpu/sh7722.h>
@@ -512,7 +513,7 @@ static struct platform_device *sh7722_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7722_early_devices,
+ sh_early_platform_add_devices(sh7722_early_devices,
ARRAY_SIZE(sh7722_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
index edb649950662..83ae1ad4a86e 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7723.c
@@ -16,6 +16,7 @@
#include <linux/io.h>
#include <asm/clock.h>
#include <asm/mmzone.h>
+#include <asm/platform_early.h>
#include <cpu/sh7723.h>
/* Serial */
@@ -410,7 +411,7 @@ static struct platform_device *sh7723_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7723_early_devices,
+ sh_early_platform_add_devices(sh7723_early_devices,
ARRAY_SIZE(sh7723_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 3e9825031d3d..0d990ab1ba2a 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -24,6 +24,7 @@
#include <asm/suspend.h>
#include <asm/clock.h>
#include <asm/mmzone.h>
+#include <asm/platform_early.h>
#include <cpu/dma-register.h>
#include <cpu/sh7724.h>
@@ -830,7 +831,7 @@ static struct platform_device *sh7724_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7724_early_devices,
+ sh_early_platform_add_devices(sh7724_early_devices,
ARRAY_SIZE(sh7724_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
index 06a91569697a..9911da794358 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7734.c
@@ -18,6 +18,7 @@
#include <linux/io.h>
#include <asm/clock.h>
#include <asm/irq.h>
+#include <asm/platform_early.h>
#include <cpu/sh7734.h>
/* SCIF */
@@ -280,7 +281,7 @@ static struct platform_device *sh7734_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7734_early_devices,
+ sh_early_platform_add_devices(sh7734_early_devices,
ARRAY_SIZE(sh7734_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
index 2501ce656511..67e330b7ea46 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7757.c
@@ -19,6 +19,7 @@
#include <linux/usb/ohci_pdriver.h>
#include <cpu/dma-register.h>
#include <cpu/sh7757.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif2_platform_data = {
.scscr = SCSCR_REIE,
@@ -767,7 +768,7 @@ static struct platform_device *sh7757_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7757_early_devices,
+ sh_early_platform_add_devices(sh7757_early_devices,
ARRAY_SIZE(sh7757_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
index 419c5efe4a17..b0608664785f 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7763.c
@@ -14,6 +14,7 @@
#include <linux/io.h>
#include <linux/serial_sci.h>
#include <linux/usb/ohci_pdriver.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE,
@@ -221,7 +222,7 @@ static struct platform_device *sh7763_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7763_early_devices,
+ sh_early_platform_add_devices(sh7763_early_devices,
ARRAY_SIZE(sh7763_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
index 5fb4cf9b58c6..5efec6ceb04d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7770.c
@@ -11,6 +11,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <linux/io.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE | SCSCR_TOIE,
@@ -316,7 +317,7 @@ static struct platform_device *sh7770_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7770_early_devices,
+ sh_early_platform_add_devices(sh7770_early_devices,
ARRAY_SIZE(sh7770_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index ab7d6b715865..c818b788ecb0 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -13,6 +13,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <cpu/dma-register.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE | SCSCR_CKE1,
@@ -285,7 +286,7 @@ void __init plat_early_device_setup(void)
scif1_platform_data.scscr &= ~SCSCR_CKE1;
}
- early_platform_add_devices(sh7780_early_devices,
+ sh_early_platform_add_devices(sh7780_early_devices,
ARRAY_SIZE(sh7780_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index a438da47285d..3b4a414d60a9 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -14,6 +14,7 @@
#include <linux/sh_timer.h>
#include <linux/sh_intc.h>
#include <asm/mmzone.h>
+#include <asm/platform_early.h>
#include <cpu/dma-register.h>
static struct plat_sci_port scif0_platform_data = {
@@ -353,7 +354,7 @@ static struct platform_device *sh7785_early_devices[] __initdata = {
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7785_early_devices,
+ sh_early_platform_add_devices(sh7785_early_devices,
ARRAY_SIZE(sh7785_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index d894165a0ef6..4b0db8259e3d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -23,6 +23,7 @@
#include <linux/usb/ohci_pdriver.h>
#include <cpu/dma-register.h>
#include <asm/mmzone.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.scscr = SCSCR_REIE | SCSCR_CKE1,
@@ -834,6 +835,6 @@ arch_initcall(sh7786_devices_setup);
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh7786_early_devices,
+ sh_early_platform_add_devices(sh7786_early_devices,
ARRAY_SIZE(sh7786_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh4a/setup-shx3.c b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
index 14aa4552bc45..7014d6d199b3 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-shx3.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-shx3.c
@@ -14,6 +14,7 @@
#include <linux/sh_intc.h>
#include <cpu/shx3.h>
#include <asm/mmzone.h>
+#include <asm/platform_early.h>
/*
* This intentionally only registers SCIF ports 0, 1, and 3. SCIF 2
@@ -152,7 +153,7 @@ arch_initcall(shx3_devices_setup);
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(shx3_early_devices,
+ sh_early_platform_add_devices(shx3_early_devices,
ARRAY_SIZE(shx3_early_devices));
}
diff --git a/arch/sh/kernel/cpu/sh5/setup-sh5.c b/arch/sh/kernel/cpu/sh5/setup-sh5.c
index 41c1673afc0b..dc8476d67244 100644
--- a/arch/sh/kernel/cpu/sh5/setup-sh5.c
+++ b/arch/sh/kernel/cpu/sh5/setup-sh5.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/sh_timer.h>
#include <asm/addrspace.h>
+#include <asm/platform_early.h>
static struct plat_sci_port scif0_platform_data = {
.flags = UPF_IOREMAP,
@@ -115,6 +116,6 @@ arch_initcall(sh5_devices_setup);
void __init plat_early_device_setup(void)
{
- early_platform_add_devices(sh5_early_devices,
+ sh_early_platform_add_devices(sh5_early_devices,
ARRAY_SIZE(sh5_early_devices));
}
diff --git a/arch/sh/kernel/io_trapped.c b/arch/sh/kernel/io_trapped.c
index bacad6da4fe4..60c828a2b8a2 100644
--- a/arch/sh/kernel/io_trapped.c
+++ b/arch/sh/kernel/io_trapped.c
@@ -99,7 +99,7 @@ int register_trapped_io(struct trapped_io *tiop)
return 0;
bad:
- pr_warning("unable to install trapped io filter\n");
+ pr_warn("unable to install trapped io filter\n");
return -1;
}
EXPORT_SYMBOL_GPL(register_trapped_io);
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 2c0e0f37a318..d232cfa01877 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -44,6 +44,7 @@
#include <asm/mmu_context.h>
#include <asm/mmzone.h>
#include <asm/sparsemem.h>
+#include <asm/platform_early.h>
/*
* Initialize loops_per_jiffy as 10000000 (1000MIPS).
@@ -328,7 +329,7 @@ void __init setup_arch(char **cmdline_p)
sh_mv_setup();
/* Let earlyprintk output early console messages */
- early_platform_driver_probe("earlyprintk", 1, 1);
+ sh_early_platform_driver_probe("earlyprintk", 1, 1);
#ifdef CONFIG_OF_FLATTREE
#ifdef CONFIG_USE_BUILTIN_DTB
@@ -354,7 +355,7 @@ void __init setup_arch(char **cmdline_p)
/* processor boot mode configuration */
int generic_mode_pins(void)
{
- pr_warning("generic_mode_pins(): missing mode pin configuration\n");
+ pr_warn("generic_mode_pins(): missing mode pin configuration\n");
return 0;
}
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index e16b2cd269a3..821a09cbd605 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -18,6 +18,7 @@
#include <linux/rtc.h>
#include <asm/clock.h>
#include <asm/rtc.h>
+#include <asm/platform_early.h>
static void __init sh_late_time_init(void)
{
@@ -30,8 +31,8 @@ static void __init sh_late_time_init(void)
* clocksource and the jiffies clocksource is used transparently
* instead. No error handling is necessary here.
*/
- early_platform_driver_register_all("earlytimer");
- early_platform_driver_probe("earlytimer", 2, 0);
+ sh_early_platform_driver_register_all("earlytimer");
+ sh_early_platform_driver_probe("earlytimer", 2, 0);
}
void __init time_init(void)
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 77a59d8c6b4d..c60b19958c35 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -48,11 +48,10 @@ SECTIONS
} = 0x0009
EXCEPTION_TABLE(16)
- NOTES
_sdata = .;
RO_DATA(PAGE_SIZE)
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
DWARF_EH_FRAME
diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c
index 792f36129062..3169a343a5ab 100644
--- a/arch/sh/mm/consistent.c
+++ b/arch/sh/mm/consistent.c
@@ -43,8 +43,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
r = pdev->resource + pdev->num_resources - 1;
if (r->flags) {
- pr_warning("%s: unable to find empty space for resource\n",
- name);
+ pr_warn("%s: unable to find empty space for resource\n", name);
return -EINVAL;
}
@@ -54,7 +53,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev,
buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL);
if (!buf) {
- pr_warning("%s: unable to allocate memory\n", name);
+ pr_warn("%s: unable to allocate memory\n", name);
return -ENOMEM;
}
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 7b946b3dee9d..0f5a501c95a9 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -24,6 +24,7 @@
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/aes.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -197,6 +198,12 @@ static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
return 0;
}
+static int aes_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *in_key,
+ unsigned int key_len)
+{
+ return aes_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{
struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -211,131 +218,108 @@ static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
ctx->ops->decrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
}
-#define AES_BLOCK_MASK (~(AES_BLOCK_SIZE-1))
-
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ecb_encrypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->ecb_encrypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- u64 *key_end;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const u64 *key_end;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ecb_decrypt(key_end,
- (const u64 *) walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr, block_len);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->ecb_decrypt(key_end, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->cbc_encrypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->cbc_encrypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- u64 *key_end;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ const u64 *key_end;
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_decrypt_keys(&ctx->key[0]);
key_end = &ctx->key[ctx->expanded_key_length / sizeof(u64)];
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->cbc_decrypt(key_end,
- (const u64 *) walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ ctx->ops->cbc_decrypt(key_end, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
- struct blkcipher_walk *walk)
+static void ctr_crypt_final(const struct crypto_sparc64_aes_ctx *ctx,
+ struct skcipher_walk *walk)
{
u8 *ctrblk = walk->iv;
u64 keystream[AES_BLOCK_SIZE / sizeof(u64)];
@@ -349,40 +333,35 @@ static void ctr_crypt_final(struct crypto_sparc64_aes_ctx *ctx,
crypto_inc(ctrblk, AES_BLOCK_SIZE);
}
-static int ctr_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ctr_crypt(struct skcipher_request *req)
{
- struct crypto_sparc64_aes_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct crypto_sparc64_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt_block(desc, &walk, AES_BLOCK_SIZE);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
ctx->ops->load_encrypt_keys(&ctx->key[0]);
while ((nbytes = walk.nbytes) >= AES_BLOCK_SIZE) {
- unsigned int block_len = nbytes & AES_BLOCK_MASK;
-
- if (likely(block_len)) {
- ctx->ops->ctr_crypt(&ctx->key[0],
- (const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= AES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ ctx->ops->ctr_crypt(&ctx->key[0], walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, AES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % AES_BLOCK_SIZE);
}
if (walk.nbytes) {
ctr_crypt_final(ctx, &walk);
- err = blkcipher_walk_done(desc, &walk, 0);
+ err = skcipher_walk_done(&walk, 0);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
.cra_name = "aes",
.cra_driver_name = "aes-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
@@ -400,66 +379,53 @@ static struct crypto_alg algs[] = { {
.cia_decrypt = crypto_aes_decrypt
}
}
-}, {
- .cra_name = "ecb(aes)",
- .cra_driver_name = "ecb-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .setkey = aes_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(aes)",
- .cra_driver_name = "cbc-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = AES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "ctr(aes)",
- .cra_driver_name = "ctr-aes-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = 1,
- .cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = AES_MIN_KEY_SIZE,
- .max_keysize = AES_MAX_KEY_SIZE,
- .ivsize = AES_BLOCK_SIZE,
- .setkey = aes_set_key,
- .encrypt = ctr_crypt,
- .decrypt = ctr_crypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(aes)",
+ .base.cra_driver_name = "ecb-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(aes)",
+ .base.cra_driver_name = "cbc-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = AES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ctr(aes)",
+ .base.cra_driver_name = "ctr-aes-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = 1,
+ .base.cra_ctxsize = sizeof(struct crypto_sparc64_aes_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .setkey = aes_set_key_skcipher,
+ .encrypt = ctr_crypt,
+ .decrypt = ctr_crypt,
+ .chunksize = AES_BLOCK_SIZE,
+ }
+};
static bool __init sparc64_has_aes_opcode(void)
{
@@ -477,17 +443,27 @@ static bool __init sparc64_has_aes_opcode(void)
static int __init aes_sparc64_mod_init(void)
{
- if (sparc64_has_aes_opcode()) {
- pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_aes_opcode()) {
+ pr_info("sparc64 aes opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 aes opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 aes opcodes optimized AES implementation\n");
+ err = crypto_register_alg(&cipher_alg);
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&cipher_alg);
+ return err;
}
static void __exit aes_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_alg(&cipher_alg);
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(aes_sparc64_mod_init);
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 3823f9491a72..1700f863748c 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/types.h>
#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -52,6 +53,12 @@ static int camellia_set_key(struct crypto_tfm *tfm, const u8 *_in_key,
return 0;
}
+static int camellia_set_key_skcipher(struct crypto_skcipher *tfm,
+ const u8 *in_key, unsigned int key_len)
+{
+ return camellia_set_key(crypto_skcipher_tfm(tfm), in_key, key_len);
+}
+
extern void camellia_sparc64_crypt(const u64 *key, const u32 *input,
u32 *output, unsigned int key_len);
@@ -81,61 +88,46 @@ typedef void ecb_crypt_op(const u64 *input, u64 *output, unsigned int len,
extern ecb_crypt_op camellia_sparc64_ecb_crypt_3_grand_rounds;
extern ecb_crypt_op camellia_sparc64_ecb_crypt_4_grand_rounds;
-#define CAMELLIA_BLOCK_MASK (~(CAMELLIA_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
ecb_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_ecb_crypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_ecb_crypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
key = &ctx->encrypt_key[0];
else
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, true);
+ return __ecb_crypt(req, true);
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, false);
+ return __ecb_crypt(req, false);
}
typedef void cbc_crypt_op(const u64 *input, u64 *output, unsigned int len,
@@ -146,85 +138,65 @@ extern cbc_crypt_op camellia_sparc64_cbc_encrypt_4_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_3_grand_rounds;
extern cbc_crypt_op camellia_sparc64_cbc_decrypt_4_grand_rounds;
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_cbc_encrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_encrypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
key = &ctx->encrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key,
- (u64 *) walk.iv);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_decrypt(struct skcipher_request *req)
{
- struct camellia_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct camellia_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
cbc_crypt_op *op;
const u64 *key;
+ unsigned int nbytes;
int err;
op = camellia_sparc64_cbc_decrypt_3_grand_rounds;
if (ctx->key_len != 16)
op = camellia_sparc64_cbc_decrypt_4_grand_rounds;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
key = &ctx->decrypt_key[0];
camellia_sparc64_load_keys(key, ctx->key_len);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & CAMELLIA_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64;
- u64 *dst64;
-
- src64 = (const u64 *)walk.src.virt.addr;
- dst64 = (u64 *) walk.dst.virt.addr;
- op(src64, dst64, block_len, key,
- (u64 *) walk.iv);
- }
- nbytes &= CAMELLIA_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ op(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, CAMELLIA_BLOCK_SIZE), key, walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % CAMELLIA_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
+static struct crypto_alg cipher_alg = {
.cra_name = "camellia",
.cra_driver_name = "camellia-sparc64",
.cra_priority = SPARC_CR_OPCODE_PRIORITY,
@@ -242,46 +214,37 @@ static struct crypto_alg algs[] = { {
.cia_decrypt = camellia_decrypt
}
}
-}, {
- .cra_name = "ecb(camellia)",
- .cra_driver_name = "ecb-camellia-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .setkey = camellia_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(camellia)",
- .cra_driver_name = "cbc-camellia-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = CAMELLIA_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = CAMELLIA_MIN_KEY_SIZE,
- .max_keysize = CAMELLIA_MAX_KEY_SIZE,
- .ivsize = CAMELLIA_BLOCK_SIZE,
- .setkey = camellia_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(camellia)",
+ .base.cra_driver_name = "ecb-camellia-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .setkey = camellia_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(camellia)",
+ .base.cra_driver_name = "cbc-camellia-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = CAMELLIA_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct camellia_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = CAMELLIA_MIN_KEY_SIZE,
+ .max_keysize = CAMELLIA_MAX_KEY_SIZE,
+ .ivsize = CAMELLIA_BLOCK_SIZE,
+ .setkey = camellia_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }
};
static bool __init sparc64_has_camellia_opcode(void)
@@ -300,17 +263,27 @@ static bool __init sparc64_has_camellia_opcode(void)
static int __init camellia_sparc64_mod_init(void)
{
- if (sparc64_has_camellia_opcode()) {
- pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_camellia_opcode()) {
+ pr_info("sparc64 camellia opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 camellia opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 camellia opcodes optimized CAMELLIA implementation\n");
+ err = crypto_register_alg(&cipher_alg);
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_alg(&cipher_alg);
+ return err;
}
static void __exit camellia_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_alg(&cipher_alg);
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(camellia_sparc64_mod_init);
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index db6010b4e52e..a499102bf706 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -13,6 +13,7 @@
#include <linux/types.h>
#include <crypto/algapi.h>
#include <crypto/internal/des.h>
+#include <crypto/internal/skcipher.h>
#include <asm/fpumacro.h>
#include <asm/pstate.h>
@@ -61,6 +62,12 @@ static int des_set_key(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return des_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
extern void des_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
@@ -85,113 +92,90 @@ extern void des_sparc64_load_keys(const u64 *key);
extern void des_sparc64_ecb_crypt(const u64 *input, u64 *output,
unsigned int len);
-#define DES_BLOCK_MASK (~(DES_BLOCK_SIZE - 1))
-
-static int __ecb_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
else
des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- des_sparc64_ecb_crypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ des_sparc64_ecb_crypt(walk.src.virt.addr, walk.dst.virt.addr,
+ round_down(nbytes, DES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_encrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, true);
+ return __ecb_crypt(req, true);
}
-static int ecb_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb_decrypt(struct skcipher_request *req)
{
- return __ecb_crypt(desc, dst, src, nbytes, false);
+ return __ecb_crypt(req, false);
}
extern void des_sparc64_cbc_encrypt(const u64 *input, u64 *output,
unsigned int len, u64 *iv);
-static int cbc_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
+ unsigned int len, u64 *iv);
+
+static int __cbc_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
- if (likely(block_len)) {
- des_sparc64_cbc_encrypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ if (encrypt)
+ des_sparc64_load_keys(&ctx->encrypt_expkey[0]);
+ else
+ des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
+ while ((nbytes = walk.nbytes) != 0) {
+ if (encrypt)
+ des_sparc64_cbc_encrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ else
+ des_sparc64_cbc_decrypt(walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-extern void des_sparc64_cbc_decrypt(const u64 *input, u64 *output,
- unsigned int len, u64 *iv);
-
-static int cbc_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int cbc_encrypt(struct skcipher_request *req)
{
- struct des_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- des_sparc64_load_keys(&ctx->decrypt_expkey[0]);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
+ return __cbc_crypt(req, true);
+}
- if (likely(block_len)) {
- des_sparc64_cbc_decrypt((const u64 *)walk.src.virt.addr,
- (u64 *) walk.dst.virt.addr,
- block_len, (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
- fprs_write(0);
- return err;
+static int cbc_decrypt(struct skcipher_request *req)
+{
+ return __cbc_crypt(req, false);
}
static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
@@ -227,6 +211,12 @@ static int des3_ede_set_key(struct crypto_tfm *tfm, const u8 *key,
return 0;
}
+static int des3_ede_set_key_skcipher(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ return des3_ede_set_key(crypto_skcipher_tfm(tfm), key, keylen);
+}
+
extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
u64 *output);
@@ -251,241 +241,196 @@ extern void des3_ede_sparc64_load_keys(const u64 *key);
extern void des3_ede_sparc64_ecb_crypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len);
-static int __ecb3_crypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes, bool encrypt)
+static int __ecb3_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
const u64 *K;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
if (encrypt)
K = &ctx->encrypt_expkey[0];
else
K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_ecb_crypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ des3_ede_sparc64_ecb_crypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes, DES_BLOCK_SIZE));
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static int ecb3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb3_encrypt(struct skcipher_request *req)
{
- return __ecb3_crypt(desc, dst, src, nbytes, true);
+ return __ecb3_crypt(req, true);
}
-static int ecb3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int ecb3_decrypt(struct skcipher_request *req)
{
- return __ecb3_crypt(desc, dst, src, nbytes, false);
+ return __ecb3_crypt(req, false);
}
extern void des3_ede_sparc64_cbc_encrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
-static int cbc3_encrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
-{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
- const u64 *K;
- int err;
-
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-
- K = &ctx->encrypt_expkey[0];
- des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_cbc_encrypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len,
- (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
- }
- fprs_write(0);
- return err;
-}
-
extern void des3_ede_sparc64_cbc_decrypt(const u64 *expkey, const u64 *input,
u64 *output, unsigned int len,
u64 *iv);
-static int cbc3_decrypt(struct blkcipher_desc *desc,
- struct scatterlist *dst, struct scatterlist *src,
- unsigned int nbytes)
+static int __cbc3_crypt(struct skcipher_request *req, bool encrypt)
{
- struct des3_ede_sparc64_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
- struct blkcipher_walk walk;
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ const struct des3_ede_sparc64_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct skcipher_walk walk;
const u64 *K;
+ unsigned int nbytes;
int err;
- blkcipher_walk_init(&walk, dst, src, nbytes);
- err = blkcipher_walk_virt(desc, &walk);
- desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = skcipher_walk_virt(&walk, req, true);
+ if (err)
+ return err;
- K = &ctx->decrypt_expkey[0];
+ if (encrypt)
+ K = &ctx->encrypt_expkey[0];
+ else
+ K = &ctx->decrypt_expkey[0];
des3_ede_sparc64_load_keys(K);
- while ((nbytes = walk.nbytes)) {
- unsigned int block_len = nbytes & DES_BLOCK_MASK;
-
- if (likely(block_len)) {
- const u64 *src64 = (const u64 *)walk.src.virt.addr;
- des3_ede_sparc64_cbc_decrypt(K, src64,
- (u64 *) walk.dst.virt.addr,
- block_len,
- (u64 *) walk.iv);
- }
- nbytes &= DES_BLOCK_SIZE - 1;
- err = blkcipher_walk_done(desc, &walk, nbytes);
+ while ((nbytes = walk.nbytes) != 0) {
+ if (encrypt)
+ des3_ede_sparc64_cbc_encrypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ else
+ des3_ede_sparc64_cbc_decrypt(K, walk.src.virt.addr,
+ walk.dst.virt.addr,
+ round_down(nbytes,
+ DES_BLOCK_SIZE),
+ walk.iv);
+ err = skcipher_walk_done(&walk, nbytes % DES_BLOCK_SIZE);
}
fprs_write(0);
return err;
}
-static struct crypto_alg algs[] = { {
- .cra_name = "des",
- .cra_driver_name = "des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES_KEY_SIZE,
- .cia_max_keysize = DES_KEY_SIZE,
- .cia_setkey = des_set_key,
- .cia_encrypt = sparc_des_encrypt,
- .cia_decrypt = sparc_des_decrypt
+static int cbc3_encrypt(struct skcipher_request *req)
+{
+ return __cbc3_crypt(req, true);
+}
+
+static int cbc3_decrypt(struct skcipher_request *req)
+{
+ return __cbc3_crypt(req, false);
+}
+
+static struct crypto_alg cipher_algs[] = {
+ {
+ .cra_name = "des",
+ .cra_driver_name = "des-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES_KEY_SIZE,
+ .cia_max_keysize = DES_KEY_SIZE,
+ .cia_setkey = des_set_key,
+ .cia_encrypt = sparc_des_encrypt,
+ .cia_decrypt = sparc_des_decrypt
+ }
}
- }
-}, {
- .cra_name = "ecb(des)",
- .cra_driver_name = "ecb-des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .setkey = des_set_key,
- .encrypt = ecb_encrypt,
- .decrypt = ecb_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(des)",
- .cra_driver_name = "cbc-des-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES_KEY_SIZE,
- .max_keysize = DES_KEY_SIZE,
- .ivsize = DES_BLOCK_SIZE,
- .setkey = des_set_key,
- .encrypt = cbc_encrypt,
- .decrypt = cbc_decrypt,
- },
- },
-}, {
- .cra_name = "des3_ede",
- .cra_driver_name = "des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .cipher = {
- .cia_min_keysize = DES3_EDE_KEY_SIZE,
- .cia_max_keysize = DES3_EDE_KEY_SIZE,
- .cia_setkey = des3_ede_set_key,
- .cia_encrypt = sparc_des3_ede_encrypt,
- .cia_decrypt = sparc_des3_ede_decrypt
+ }, {
+ .cra_name = "des3_ede",
+ .cra_driver_name = "des3_ede-sparc64",
+ .cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .cra_flags = CRYPTO_ALG_TYPE_CIPHER,
+ .cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .cra_alignmask = 7,
+ .cra_module = THIS_MODULE,
+ .cra_u = {
+ .cipher = {
+ .cia_min_keysize = DES3_EDE_KEY_SIZE,
+ .cia_max_keysize = DES3_EDE_KEY_SIZE,
+ .cia_setkey = des3_ede_set_key,
+ .cia_encrypt = sparc_des3_ede_encrypt,
+ .cia_decrypt = sparc_des3_ede_decrypt
+ }
}
}
-}, {
- .cra_name = "ecb(des3_ede)",
- .cra_driver_name = "ecb-des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .setkey = des3_ede_set_key,
- .encrypt = ecb3_encrypt,
- .decrypt = ecb3_decrypt,
- },
- },
-}, {
- .cra_name = "cbc(des3_ede)",
- .cra_driver_name = "cbc-des3_ede-sparc64",
- .cra_priority = SPARC_CR_OPCODE_PRIORITY,
- .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
- .cra_blocksize = DES3_EDE_BLOCK_SIZE,
- .cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
- .cra_alignmask = 7,
- .cra_type = &crypto_blkcipher_type,
- .cra_module = THIS_MODULE,
- .cra_u = {
- .blkcipher = {
- .min_keysize = DES3_EDE_KEY_SIZE,
- .max_keysize = DES3_EDE_KEY_SIZE,
- .ivsize = DES3_EDE_BLOCK_SIZE,
- .setkey = des3_ede_set_key,
- .encrypt = cbc3_encrypt,
- .decrypt = cbc3_decrypt,
- },
- },
-} };
+};
+
+static struct skcipher_alg skcipher_algs[] = {
+ {
+ .base.cra_name = "ecb(des)",
+ .base.cra_driver_name = "ecb-des-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .setkey = des_set_key_skcipher,
+ .encrypt = ecb_encrypt,
+ .decrypt = ecb_decrypt,
+ }, {
+ .base.cra_name = "cbc(des)",
+ .base.cra_driver_name = "cbc-des-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES_KEY_SIZE,
+ .max_keysize = DES_KEY_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .setkey = des_set_key_skcipher,
+ .encrypt = cbc_encrypt,
+ .decrypt = cbc_decrypt,
+ }, {
+ .base.cra_name = "ecb(des3_ede)",
+ .base.cra_driver_name = "ecb-des3_ede-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .setkey = des3_ede_set_key_skcipher,
+ .encrypt = ecb3_encrypt,
+ .decrypt = ecb3_decrypt,
+ }, {
+ .base.cra_name = "cbc(des3_ede)",
+ .base.cra_driver_name = "cbc-des3_ede-sparc64",
+ .base.cra_priority = SPARC_CR_OPCODE_PRIORITY,
+ .base.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+ .base.cra_ctxsize = sizeof(struct des3_ede_sparc64_ctx),
+ .base.cra_alignmask = 7,
+ .base.cra_module = THIS_MODULE,
+ .min_keysize = DES3_EDE_KEY_SIZE,
+ .max_keysize = DES3_EDE_KEY_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .setkey = des3_ede_set_key_skcipher,
+ .encrypt = cbc3_encrypt,
+ .decrypt = cbc3_decrypt,
+ }
+};
static bool __init sparc64_has_des_opcode(void)
{
@@ -503,17 +448,27 @@ static bool __init sparc64_has_des_opcode(void)
static int __init des_sparc64_mod_init(void)
{
- if (sparc64_has_des_opcode()) {
- pr_info("Using sparc64 des opcodes optimized DES implementation\n");
- return crypto_register_algs(algs, ARRAY_SIZE(algs));
+ int err;
+
+ if (!sparc64_has_des_opcode()) {
+ pr_info("sparc64 des opcodes not available.\n");
+ return -ENODEV;
}
- pr_info("sparc64 des opcodes not available.\n");
- return -ENODEV;
+ pr_info("Using sparc64 des opcodes optimized DES implementation\n");
+ err = crypto_register_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ if (err)
+ return err;
+ err = crypto_register_skciphers(skcipher_algs,
+ ARRAY_SIZE(skcipher_algs));
+ if (err)
+ crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ return err;
}
static void __exit des_sparc64_mod_fini(void)
{
- crypto_unregister_algs(algs, ARRAY_SIZE(algs));
+ crypto_unregister_algs(cipher_algs, ARRAY_SIZE(cipher_algs));
+ crypto_unregister_skciphers(skcipher_algs, ARRAY_SIZE(skcipher_algs));
}
module_init(des_sparc64_mod_init);
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index a8275fea4b70..9b4506373353 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1673,9 +1673,9 @@ void __init setup_per_cpu_areas(void)
pcpu_alloc_bootmem,
pcpu_free_bootmem);
if (rc)
- pr_warning("PERCPU: %s allocator failed (%d), "
- "falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
+ pr_warn("PERCPU: %s allocator failed (%d), "
+ "falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE,
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 61afd787bd0c..7ec79918b566 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -67,7 +67,7 @@ SECTIONS
.data1 : {
*(.data1)
}
- RW_DATA_SECTION(SMP_CACHE_BYTES, 0, THREAD_SIZE)
+ RW_DATA(SMP_CACHE_BYTES, 0, THREAD_SIZE)
/* End of data section */
_edata = .;
@@ -78,7 +78,6 @@ SECTIONS
__stop___fixup = .;
}
EXCEPTION_TABLE(16)
- NOTES
. = ALIGN(PAGE_SIZE);
__init_begin = ALIGN(PAGE_SIZE);
diff --git a/arch/um/include/asm/common.lds.S b/arch/um/include/asm/common.lds.S
index d7086b985f27..7145ce699982 100644
--- a/arch/um/include/asm/common.lds.S
+++ b/arch/um/include/asm/common.lds.S
@@ -9,14 +9,13 @@
_sdata = .;
PROVIDE (sdata = .);
- RODATA
+ RO_DATA(4096)
.unprotected : { *(.unprotected) }
. = ALIGN(4096);
PROVIDE (_unprotected_end = .);
. = ALIGN(4096);
- NOTES
EXCEPTION_TABLE(0)
BUG_TABLE
diff --git a/arch/unicore32/kernel/vmlinux.lds.S b/arch/unicore32/kernel/vmlinux.lds.S
index 7abf90537cd5..6fb320b337ef 100644
--- a/arch/unicore32/kernel/vmlinux.lds.S
+++ b/arch/unicore32/kernel/vmlinux.lds.S
@@ -43,12 +43,11 @@ SECTIONS
_etext = .;
_sdata = .;
- RO_DATA_SECTION(PAGE_SIZE)
- RW_DATA_SECTION(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ RO_DATA(PAGE_SIZE)
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
_edata = .;
EXCEPTION_TABLE(L1_CACHE_BYTES)
- NOTES
BSS_SECTION(0, 0, 0)
_end = .;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ef85139553f..58b31ee198d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -24,7 +24,7 @@ config X86_64
depends on 64BIT
# Options that are inherently 64-bit kernel only:
select ARCH_HAS_GIGANTIC_PAGE
- select ARCH_SUPPORTS_INT128
+ select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
select ARCH_USE_CMPXCHG_LOCKREF
select HAVE_ARCH_SOFT_DIRTY
select MODULES_USE_ELF_RELA
@@ -73,7 +73,6 @@ config X86
select ARCH_HAS_PMEM_API if X86_64
select ARCH_HAS_PTE_DEVMAP if X86_64
select ARCH_HAS_PTE_SPECIAL
- select ARCH_HAS_REFCOUNT
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_UACCESS_MCSAFE if X86_64 && X86_MCE
select ARCH_HAS_SET_MEMORY
@@ -158,6 +157,7 @@ config X86
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
+ select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
select HAVE_EISA
@@ -932,36 +932,6 @@ config GART_IOMMU
If unsure, say Y.
-config CALGARY_IOMMU
- bool "IBM Calgary IOMMU support"
- select IOMMU_HELPER
- select SWIOTLB
- depends on X86_64 && PCI
- ---help---
- Support for hardware IOMMUs in IBM's xSeries x366 and x460
- systems. Needed to run systems with more than 3GB of memory
- properly with 32-bit PCI devices that do not support DAC
- (Double Address Cycle). Calgary also supports bus level
- isolation, where all DMAs pass through the IOMMU. This
- prevents them from going anywhere except their intended
- destination. This catches hard-to-find kernel bugs and
- mis-behaving drivers and devices that do not use the DMA-API
- properly to set up their DMA buffers. The IOMMU can be
- turned off at boot time with the iommu=off parameter.
- Normally the kernel will make the right choice by itself.
- If unsure, say Y.
-
-config CALGARY_IOMMU_ENABLED_BY_DEFAULT
- def_bool y
- prompt "Should Calgary be enabled by default?"
- depends on CALGARY_IOMMU
- ---help---
- Should Calgary be enabled by default? if you choose 'y', Calgary
- will be used (if it exists). If you choose 'n', Calgary will not be
- used even if it exists. If you choose 'n' and would like to use
- Calgary anyway, pass 'iommu=calgary' on the kernel command line.
- If unsure, say Y.
-
config MAXSMP
bool "Enable Maximum number of SMP Processors and NUMA Nodes"
depends on X86_64 && SMP && DEBUG_KERNEL
@@ -1000,8 +970,8 @@ config NR_CPUS_RANGE_END
config NR_CPUS_RANGE_END
int
depends on X86_64
- default 8192 if SMP && ( MAXSMP || CPUMASK_OFFSTACK)
- default 512 if SMP && (!MAXSMP && !CPUMASK_OFFSTACK)
+ default 8192 if SMP && CPUMASK_OFFSTACK
+ default 512 if SMP && !CPUMASK_OFFSTACK
default 1 if !SMP
config NR_CPUS_DEFAULT
@@ -1254,6 +1224,24 @@ config X86_VSYSCALL_EMULATION
Disabling this option saves about 7K of kernel size and
possibly 4K of additional runtime pagetable memory.
+config X86_IOPL_IOPERM
+ bool "IOPERM and IOPL Emulation"
+ default y
+ ---help---
+ This enables the ioperm() and iopl() syscalls which are necessary
+ for legacy applications.
+
+ Legacy IOPL support is an overbroad mechanism which allows user
+ space aside of accessing all 65536 I/O ports also to disable
+ interrupts. To gain this access the caller needs CAP_SYS_RAWIO
+ capabilities and permission from potentially active security
+ modules.
+
+ The emulation restricts the functionality of the syscall to
+ only allowing the full range I/O port access, but prevents the
+ ability to disable interrupts from user space which would be
+ granted if the hardware IOPL mechanism would be used.
+
config TOSHIBA
tristate "Toshiba Laptop support"
depends on X86_32
@@ -1492,6 +1480,7 @@ config X86_PAE
config X86_5LEVEL
bool "Enable 5-level page tables support"
+ default y
select DYNAMIC_MEMORY_LAYOUT
select SPARSEMEM_VMEMMAP
depends on X86_64
@@ -1751,7 +1740,7 @@ config X86_RESERVE_LOW
config MATH_EMULATION
bool
depends on MODIFY_LDT_SYSCALL
- prompt "Math emulation" if X86_32
+ prompt "Math emulation" if X86_32 && (M486SX || MELAN)
---help---
Linux can emulate a math coprocessor (used for floating point
operations) if you don't have one. 486DX and Pentium processors have
@@ -1880,16 +1869,16 @@ config X86_SMAP
If unsure, say Y.
-config X86_INTEL_UMIP
+config X86_UMIP
def_bool y
- depends on CPU_SUP_INTEL
- prompt "Intel User Mode Instruction Prevention" if EXPERT
+ depends on CPU_SUP_INTEL || CPU_SUP_AMD
+ prompt "User Mode Instruction Prevention" if EXPERT
---help---
- The User Mode Instruction Prevention (UMIP) is a security
- feature in newer Intel processors. If enabled, a general
- protection fault is issued if the SGDT, SLDT, SIDT, SMSW
- or STR instructions are executed in user mode. These instructions
- unnecessarily expose information about the hardware state.
+ User Mode Instruction Prevention (UMIP) is a security feature in
+ some x86 processors. If enabled, a general protection fault is
+ issued if the SGDT, SLDT, SIDT, SMSW or STR instructions are
+ executed in user mode. These instructions unnecessarily expose
+ information about the hardware state.
The vast majority of applications do not use these instructions.
For the very few that do, software emulation is provided in
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8e29c991ba3e..af9c967782f6 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -50,12 +50,19 @@ choice
See each option's help text for additional details. If you don't know
what to do, choose "486".
+config M486SX
+ bool "486SX"
+ depends on X86_32
+ ---help---
+ Select this for an 486-class CPU without an FPU such as
+ AMD/Cyrix/IBM/Intel SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5S.
+
config M486
- bool "486"
+ bool "486DX"
depends on X86_32
---help---
Select this for an 486-class CPU such as AMD/Cyrix/IBM/Intel
- 486DX/DX2/DX4 or SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
+ 486DX/DX2/DX4 and UMC U5D.
config M586
bool "586/K5/5x86/6x86/6x86MX"
@@ -312,20 +319,20 @@ config X86_L1_CACHE_SHIFT
int
default "7" if MPENTIUM4 || MPSC
default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MATOM || MVIAC7 || X86_GENERIC || GENERIC_CPU
- default "4" if MELAN || M486 || MGEODEGX1
+ default "4" if MELAN || M486SX || M486 || MGEODEGX1
default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
config X86_F00F_BUG
def_bool y
- depends on M586MMX || M586TSC || M586 || M486
+ depends on M586MMX || M586TSC || M586 || M486SX || M486
config X86_INVD_BUG
def_bool y
- depends on M486
+ depends on M486SX || M486
config X86_ALIGNMENT_16
def_bool y
- depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
+ depends on MWINCHIP3D || MWINCHIPC6 || MCYRIXIII || MELAN || MK6 || M586MMX || M586TSC || M586 || M486SX || M486 || MVIAC3_2 || MGEODEGX1
config X86_INTEL_USERCOPY
def_bool y
@@ -378,7 +385,7 @@ config X86_MINIMUM_CPU_FAMILY
config X86_DEBUGCTLMSR
def_bool y
- depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486) && !UML
+ depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486SX || M486) && !UML
menuconfig PROCESSOR_SELECT
bool "Supported processor vendors" if EXPERT
@@ -402,7 +409,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32
default y
bool "Support Cyrix processors" if PROCESSOR_SELECT
- depends on M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
+ depends on M486SX || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for Cyrix processors
@@ -470,7 +477,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32
default y
bool "Support UMC processors" if PROCESSOR_SELECT
- depends on M486 || (EXPERT && !64BIT)
+ depends on M486SX || M486 || (EXPERT && !64BIT)
---help---
This enables detection, tunings and quirks for UMC processors
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bf9cd83de777..409c00f74e60 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -316,10 +316,6 @@ config UNWINDER_FRAME_POINTER
unwinder, but the kernel text size will grow by ~3% and the kernel's
overall performance will degrade by roughly 5-10%.
- This option is recommended if you want to use the livepatch
- consistency model, as this is currently the only way to get a
- reliable stack trace (CONFIG_HAVE_RELIABLE_STACKTRACE).
-
config UNWINDER_GUESS
bool "Guess unwinder"
depends on EXPERT
diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu
index 1f5faf8606b4..cd3056759880 100644
--- a/arch/x86/Makefile_32.cpu
+++ b/arch/x86/Makefile_32.cpu
@@ -10,6 +10,7 @@ else
tune = $(call cc-option,-mcpu=$(1),$(2))
endif
+cflags-$(CONFIG_M486SX) += -march=i486
cflags-$(CONFIG_M486) += -march=i486
cflags-$(CONFIG_M586) += -march=i586
cflags-$(CONFIG_M586TSC) += -march=i586
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index e2839b5c246c..95410d6ee2ff 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -67,6 +67,7 @@ clean-files += cpustr.h
KBUILD_CFLAGS := $(REALMODE_CFLAGS) -D_SETUP
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
GCOV_PROFILE := n
UBSAN_SANITIZE := n
@@ -87,7 +88,7 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE
SETUP_OBJS = $(addprefix $(obj)/,$(setup-y))
-sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
+sed-zoffset := -e 's/^\([0-9a-fA-F]*\) [ABCDGRSTVW] \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|kernel_info\|_end\|_ehead\|_text\|z_.*\)$$/\#define ZO_\2 0x\1/p'
quiet_cmd_zoffset = ZOFFSET $@
cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 6b84afdd7538..aa976adb7094 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -38,6 +38,7 @@ KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector)
KBUILD_CFLAGS += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS += $(call cc-disable-warning, gnu)
KBUILD_CFLAGS += -Wno-pointer-sign
+KBUILD_CFLAGS += $(call cc-option,-fmacro-prefix-map=$(srctree)/=)
KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__
GCOV_PROFILE := n
@@ -72,8 +73,8 @@ $(obj)/../voffset.h: vmlinux FORCE
$(obj)/misc.o: $(obj)/../voffset.h
-vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
- $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
+vmlinux-objs-y := $(obj)/vmlinux.lds $(obj)/kernel_info.o $(obj)/head_$(BITS).o \
+ $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/error.o \
$(obj)/piggy.o $(obj)/cpuflags.o
vmlinux-objs-$(CONFIG_EARLY_PRINTK) += $(obj)/early_serial_console.o
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index 82bc60c8acb2..72b08fde6de6 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -554,7 +554,11 @@ setup_e820(struct boot_params *params, struct setup_data *e820ext, u32 e820ext_s
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- e820_type = E820_TYPE_RAM;
+ if (efi_soft_reserve_enabled() &&
+ (d->attribute & EFI_MEMORY_SP))
+ e820_type = E820_TYPE_SOFT_RESERVED;
+ else
+ e820_type = E820_TYPE_RAM;
break;
case EFI_ACPI_MEMORY_NVS:
@@ -782,6 +786,9 @@ efi_main(struct efi_config *c, struct boot_params *boot_params)
/* Ask the firmware to clear memory on unclean shutdown */
efi_enable_reset_attack_mitigation(sys_table);
+
+ efi_random_get_seed(sys_table);
+
efi_retrieve_tpm2_eventlog(sys_table);
setup_graphics(boot_params);
diff --git a/arch/x86/boot/compressed/efi_stub_32.S b/arch/x86/boot/compressed/efi_stub_32.S
index 257e341fd2c8..ed6c351d34ed 100644
--- a/arch/x86/boot/compressed/efi_stub_32.S
+++ b/arch/x86/boot/compressed/efi_stub_32.S
@@ -24,7 +24,7 @@
*/
.text
-ENTRY(efi_call_phys)
+SYM_FUNC_START(efi_call_phys)
/*
* 0. The function can only be called in Linux kernel. So CS has been
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
@@ -77,7 +77,7 @@ ENTRY(efi_call_phys)
movl saved_return_addr(%edx), %ecx
pushl %ecx
ret
-ENDPROC(efi_call_phys)
+SYM_FUNC_END(efi_call_phys)
.previous
.data
diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S
index bff9ab7c6317..593913692d16 100644
--- a/arch/x86/boot/compressed/efi_thunk_64.S
+++ b/arch/x86/boot/compressed/efi_thunk_64.S
@@ -23,7 +23,7 @@
.code64
.text
-ENTRY(efi64_thunk)
+SYM_FUNC_START(efi64_thunk)
push %rbp
push %rbx
@@ -97,14 +97,14 @@ ENTRY(efi64_thunk)
pop %rbx
pop %rbp
ret
-ENDPROC(efi64_thunk)
+SYM_FUNC_END(efi64_thunk)
-ENTRY(efi_exit32)
+SYM_FUNC_START_LOCAL(efi_exit32)
movq func_rt_ptr(%rip), %rax
push %rax
mov %rdi, %rax
ret
-ENDPROC(efi_exit32)
+SYM_FUNC_END(efi_exit32)
.code32
/*
@@ -112,7 +112,7 @@ ENDPROC(efi_exit32)
*
* The stack should represent the 32-bit calling convention.
*/
-ENTRY(efi_enter32)
+SYM_FUNC_START_LOCAL(efi_enter32)
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
@@ -172,20 +172,23 @@ ENTRY(efi_enter32)
btsl $X86_CR0_PG_BIT, %eax
movl %eax, %cr0
lret
-ENDPROC(efi_enter32)
+SYM_FUNC_END(efi_enter32)
.data
.balign 8
- .global efi32_boot_gdt
-efi32_boot_gdt: .word 0
- .quad 0
+SYM_DATA_START(efi32_boot_gdt)
+ .word 0
+ .quad 0
+SYM_DATA_END(efi32_boot_gdt)
+
+SYM_DATA_START_LOCAL(save_gdt)
+ .word 0
+ .quad 0
+SYM_DATA_END(save_gdt)
-save_gdt: .word 0
- .quad 0
-func_rt_ptr: .quad 0
+SYM_DATA_LOCAL(func_rt_ptr, .quad 0)
- .global efi_gdt64
-efi_gdt64:
+SYM_DATA_START(efi_gdt64)
.word efi_gdt64_end - efi_gdt64
.long 0 /* Filled out by user */
.word 0
@@ -194,4 +197,4 @@ efi_gdt64:
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
-efi_gdt64_end:
+SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end)
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 5e30eaaf8576..f2dfd6d083ef 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -61,7 +61,7 @@
.hidden _egot
__HEAD
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -142,14 +142,14 @@ ENTRY(startup_32)
*/
leal .Lrelocated(%ebx), %eax
jmp *%eax
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
/*
* We don't need the return address, so set up the stack so efi_main() can find
* its arguments.
*/
-ENTRY(efi_pe_entry)
+SYM_FUNC_START(efi_pe_entry)
add $0x4, %esp
call 1f
@@ -174,9 +174,9 @@ ENTRY(efi_pe_entry)
pushl %eax
pushl %ecx
jmp 2f /* Skip efi_config initialization */
-ENDPROC(efi_pe_entry)
+SYM_FUNC_END(efi_pe_entry)
-ENTRY(efi32_stub_entry)
+SYM_FUNC_START(efi32_stub_entry)
add $0x4, %esp
popl %ecx
popl %edx
@@ -205,11 +205,11 @@ fail:
movl BP_code32_start(%esi), %eax
leal startup_32(%eax), %eax
jmp *%eax
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
#endif
.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/*
* Clear BSS (stack is currently empty)
@@ -260,6 +260,7 @@ ENDPROC(efi32_stub_entry)
*/
xorl %ebx, %ebx
jmp *%eax
+SYM_FUNC_END(.Lrelocated)
#ifdef CONFIG_EFI_STUB
.data
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d98cd483377e..58a512e33d8d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -45,7 +45,7 @@
__HEAD
.code32
-ENTRY(startup_32)
+SYM_FUNC_START(startup_32)
/*
* 32bit entry is 0 and it is ABI so immutable!
* If we come here directly from a bootloader,
@@ -222,11 +222,11 @@ ENTRY(startup_32)
/* Jump from 32bit compatibility mode into 64bit mode. */
lret
-ENDPROC(startup_32)
+SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_MIXED
.org 0x190
-ENTRY(efi32_stub_entry)
+SYM_FUNC_START(efi32_stub_entry)
add $0x4, %esp /* Discard return address */
popl %ecx
popl %edx
@@ -245,12 +245,12 @@ ENTRY(efi32_stub_entry)
movl %eax, efi_config(%ebp)
jmp startup_32
-ENDPROC(efi32_stub_entry)
+SYM_FUNC_END(efi32_stub_entry)
#endif
.code64
.org 0x200
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
/*
* 64bit entry is 0x200 and it is ABI so immutable!
* We come here either from startup_32 or directly from a
@@ -442,11 +442,12 @@ trampoline_return:
*/
leaq .Lrelocated(%rbx), %rax
jmp *%rax
+SYM_CODE_END(startup_64)
#ifdef CONFIG_EFI_STUB
/* The entry point for the PE/COFF executable is efi_pe_entry. */
-ENTRY(efi_pe_entry)
+SYM_FUNC_START(efi_pe_entry)
movq %rcx, efi64_config(%rip) /* Handle */
movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */
@@ -495,10 +496,10 @@ fail:
movl BP_code32_start(%esi), %eax
leaq startup_64(%rax), %rax
jmp *%rax
-ENDPROC(efi_pe_entry)
+SYM_FUNC_END(efi_pe_entry)
.org 0x390
-ENTRY(efi64_stub_entry)
+SYM_FUNC_START(efi64_stub_entry)
movq %rdi, efi64_config(%rip) /* Handle */
movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */
@@ -507,11 +508,11 @@ ENTRY(efi64_stub_entry)
movq %rdx, %rsi
jmp handover_entry
-ENDPROC(efi64_stub_entry)
+SYM_FUNC_END(efi64_stub_entry)
#endif
.text
-.Lrelocated:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
/*
* Clear BSS (stack is currently empty)
@@ -540,6 +541,7 @@ ENDPROC(efi64_stub_entry)
* Jump to the decompressed kernel.
*/
jmp *%rax
+SYM_FUNC_END(.Lrelocated)
/*
* Adjust the global offset table
@@ -570,7 +572,7 @@ ENDPROC(efi64_stub_entry)
* ECX contains the base address of the trampoline memory.
* Non zero RDX means trampoline needs to enable 5-level paging.
*/
-ENTRY(trampoline_32bit_src)
+SYM_CODE_START(trampoline_32bit_src)
/* Set up data and stack segments */
movl $__KERNEL_DS, %eax
movl %eax, %ds
@@ -633,11 +635,13 @@ ENTRY(trampoline_32bit_src)
movl %eax, %cr0
lret
+SYM_CODE_END(trampoline_32bit_src)
.code64
-.Lpaging_enabled:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
/* Return from the trampoline */
jmp *%rdi
+SYM_FUNC_END(.Lpaging_enabled)
/*
* The trampoline code has a size limit.
@@ -647,20 +651,22 @@ ENTRY(trampoline_32bit_src)
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
.code32
-.Lno_longmode:
+SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
1:
hlt
jmp 1b
+SYM_FUNC_END(.Lno_longmode)
#include "../../kernel/verify_cpu.S"
.data
-gdt64:
+SYM_DATA_START_LOCAL(gdt64)
.word gdt_end - gdt
.quad 0
+SYM_DATA_END(gdt64)
.balign 8
-gdt:
+SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt
.long gdt
.word 0
@@ -669,25 +675,24 @@ gdt:
.quad 0x00cf92000000ffff /* __KERNEL_DS */
.quad 0x0080890000000000 /* TS descriptor */
.quad 0x0000000000000000 /* TS continued */
-gdt_end:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
#ifdef CONFIG_EFI_STUB
-efi_config:
- .quad 0
+SYM_DATA_LOCAL(efi_config, .quad 0)
#ifdef CONFIG_EFI_MIXED
- .global efi32_config
-efi32_config:
+SYM_DATA_START(efi32_config)
.fill 5,8,0
.quad efi64_thunk
.byte 0
+SYM_DATA_END(efi32_config)
#endif
- .global efi64_config
-efi64_config:
+SYM_DATA_START(efi64_config)
.fill 5,8,0
.quad efi_call
.byte 1
+SYM_DATA_END(efi64_config)
#endif /* CONFIG_EFI_STUB */
/*
@@ -695,23 +700,21 @@ efi64_config:
*/
.bss
.balign 4
-boot_heap:
- .fill BOOT_HEAP_SIZE, 1, 0
-boot_stack:
+SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0)
+
+SYM_DATA_START_LOCAL(boot_stack)
.fill BOOT_STACK_SIZE, 1, 0
-boot_stack_end:
+SYM_DATA_END_LABEL(boot_stack, SYM_L_LOCAL, boot_stack_end)
/*
* Space for page tables (not in .bss so not zeroed)
*/
.section ".pgtable","a",@nobits
.balign 4096
-pgtable:
- .fill BOOT_PGT_SIZE, 1, 0
+SYM_DATA_LOCAL(pgtable, .fill BOOT_PGT_SIZE, 1, 0)
/*
* The page table is going to be used instead of page table in the trampoline
* memory.
*/
-top_pgtable:
- .fill PAGE_SIZE, 1, 0
+SYM_DATA_LOCAL(top_pgtable, .fill PAGE_SIZE, 1, 0)
diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
index 2e53c056ba20..d7408af55738 100644
--- a/arch/x86/boot/compressed/kaslr.c
+++ b/arch/x86/boot/compressed/kaslr.c
@@ -132,8 +132,14 @@ char *skip_spaces(const char *str)
#include "../../../../lib/ctype.c"
#include "../../../../lib/cmdline.c"
+enum parse_mode {
+ PARSE_MEMMAP,
+ PARSE_EFI,
+};
+
static int
-parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
+parse_memmap(char *p, unsigned long long *start, unsigned long long *size,
+ enum parse_mode mode)
{
char *oldp;
@@ -156,8 +162,29 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
*start = memparse(p + 1, &p);
return 0;
case '@':
- /* memmap=nn@ss specifies usable region, should be skipped */
- *size = 0;
+ if (mode == PARSE_MEMMAP) {
+ /*
+ * memmap=nn@ss specifies usable region, should
+ * be skipped
+ */
+ *size = 0;
+ } else {
+ unsigned long long flags;
+
+ /*
+ * efi_fake_mem=nn@ss:attr the attr specifies
+ * flags that might imply a soft-reservation.
+ */
+ *start = memparse(p + 1, &p);
+ if (p && *p == ':') {
+ p++;
+ if (kstrtoull(p, 0, &flags) < 0)
+ *size = 0;
+ else if (flags & EFI_MEMORY_SP)
+ return 0;
+ }
+ *size = 0;
+ }
/* Fall through */
default:
/*
@@ -172,7 +199,7 @@ parse_memmap(char *p, unsigned long long *start, unsigned long long *size)
return -EINVAL;
}
-static void mem_avoid_memmap(char *str)
+static void mem_avoid_memmap(enum parse_mode mode, char *str)
{
static int i;
@@ -187,7 +214,7 @@ static void mem_avoid_memmap(char *str)
if (k)
*k++ = 0;
- rc = parse_memmap(str, &start, &size);
+ rc = parse_memmap(str, &start, &size, mode);
if (rc < 0)
break;
str = k;
@@ -238,7 +265,6 @@ static void parse_gb_huge_pages(char *param, char *val)
}
}
-
static void handle_mem_options(void)
{
char *args = (char *)get_cmd_line_ptr();
@@ -271,7 +297,7 @@ static void handle_mem_options(void)
}
if (!strcmp(param, "memmap")) {
- mem_avoid_memmap(val);
+ mem_avoid_memmap(PARSE_MEMMAP, val);
} else if (strstr(param, "hugepages")) {
parse_gb_huge_pages(param, val);
} else if (!strcmp(param, "mem")) {
@@ -284,6 +310,8 @@ static void handle_mem_options(void)
goto out;
mem_limit = mem_size;
+ } else if (!strcmp(param, "efi_fake_mem")) {
+ mem_avoid_memmap(PARSE_EFI, val);
}
}
@@ -459,6 +487,18 @@ static bool mem_avoid_overlap(struct mem_vector *img,
is_overlapping = true;
}
+ if (ptr->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)ptr->data)->type != SETUP_INDIRECT) {
+ avoid.start = ((struct setup_indirect *)ptr->data)->addr;
+ avoid.size = ((struct setup_indirect *)ptr->data)->len;
+
+ if (mem_overlaps(img, &avoid) && (avoid.start < earliest)) {
+ *overlap = avoid;
+ earliest = overlap->start;
+ is_overlapping = true;
+ }
+ }
+
ptr = (struct setup_data *)(unsigned long)ptr->next;
}
@@ -760,6 +800,10 @@ process_efi_entries(unsigned long minimum, unsigned long image_size)
if (md->type != EFI_CONVENTIONAL_MEMORY)
continue;
+ if (efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ continue;
+
if (efi_mirror_found &&
!(md->attribute & EFI_MEMORY_MORE_RELIABLE))
continue;
diff --git a/arch/x86/boot/compressed/kernel_info.S b/arch/x86/boot/compressed/kernel_info.S
new file mode 100644
index 000000000000..f818ee8fba38
--- /dev/null
+++ b/arch/x86/boot/compressed/kernel_info.S
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/bootparam.h>
+
+ .section ".rodata.kernel_info", "a"
+
+ .global kernel_info
+
+kernel_info:
+ /* Header, Linux top (structure). */
+ .ascii "LToP"
+ /* Size. */
+ .long kernel_info_var_len_data - kernel_info
+ /* Size total. */
+ .long kernel_info_end - kernel_info
+
+ /* Maximal allowed type for setup_data and setup_indirect structs. */
+ .long SETUP_TYPE_MAX
+
+kernel_info_var_len_data:
+ /* Empty for time being... */
+kernel_info_end:
diff --git a/arch/x86/boot/compressed/mem_encrypt.S b/arch/x86/boot/compressed/mem_encrypt.S
index 6afb7130a387..dd07e7b41b11 100644
--- a/arch/x86/boot/compressed/mem_encrypt.S
+++ b/arch/x86/boot/compressed/mem_encrypt.S
@@ -15,7 +15,7 @@
.text
.code32
-ENTRY(get_sev_encryption_bit)
+SYM_FUNC_START(get_sev_encryption_bit)
xor %eax, %eax
#ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -65,10 +65,10 @@ ENTRY(get_sev_encryption_bit)
#endif /* CONFIG_AMD_MEM_ENCRYPT */
ret
-ENDPROC(get_sev_encryption_bit)
+SYM_FUNC_END(get_sev_encryption_bit)
.code64
-ENTRY(set_sev_encryption_mask)
+SYM_FUNC_START(set_sev_encryption_mask)
#ifdef CONFIG_AMD_MEM_ENCRYPT
push %rbp
push %rdx
@@ -90,12 +90,11 @@ ENTRY(set_sev_encryption_mask)
xor %rax, %rax
ret
-ENDPROC(set_sev_encryption_mask)
+SYM_FUNC_END(set_sev_encryption_mask)
.data
#ifdef CONFIG_AMD_MEM_ENCRYPT
.balign 8
-GLOBAL(sme_me_mask)
- .quad 0
+SYM_DATA(sme_me_mask, .quad 0)
#endif
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S
index 4c5f4f4ad035..6afd05e819d2 100644
--- a/arch/x86/boot/copy.S
+++ b/arch/x86/boot/copy.S
@@ -15,7 +15,7 @@
.code16
.text
-GLOBAL(memcpy)
+SYM_FUNC_START_NOALIGN(memcpy)
pushw %si
pushw %di
movw %ax, %di
@@ -29,9 +29,9 @@ GLOBAL(memcpy)
popw %di
popw %si
retl
-ENDPROC(memcpy)
+SYM_FUNC_END(memcpy)
-GLOBAL(memset)
+SYM_FUNC_START_NOALIGN(memset)
pushw %di
movw %ax, %di
movzbl %dl, %eax
@@ -44,22 +44,22 @@ GLOBAL(memset)
rep; stosb
popw %di
retl
-ENDPROC(memset)
+SYM_FUNC_END(memset)
-GLOBAL(copy_from_fs)
+SYM_FUNC_START_NOALIGN(copy_from_fs)
pushw %ds
pushw %fs
popw %ds
calll memcpy
popw %ds
retl
-ENDPROC(copy_from_fs)
+SYM_FUNC_END(copy_from_fs)
-GLOBAL(copy_to_fs)
+SYM_FUNC_START_NOALIGN(copy_to_fs)
pushw %es
pushw %fs
popw %es
calll memcpy
popw %es
retl
-ENDPROC(copy_to_fs)
+SYM_FUNC_END(copy_to_fs)
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index 2c11c0f45d49..97d9b6d6c1af 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -300,7 +300,7 @@ _start:
# Part 2 of the header, from the old setup.S
.ascii "HdrS" # header signature
- .word 0x020d # header version number (>= 0x0105)
+ .word 0x020f # header version number (>= 0x0105)
# or else old loadlin-1.5 will fail)
.globl realmode_swtch
realmode_swtch: .word 0, 0 # default_switch, SETUPSEG
@@ -567,6 +567,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr
init_size: .long INIT_SIZE # kernel initialization size
handover_offset: .long 0 # Filled in by build.c
+kernel_info_offset: .long 0 # Filled in by build.c
# End of setup header #####################################################
diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S
index c22f9a7d1aeb..cbec8bd0841f 100644
--- a/arch/x86/boot/pmjump.S
+++ b/arch/x86/boot/pmjump.S
@@ -21,7 +21,7 @@
/*
* void protected_mode_jump(u32 entrypoint, u32 bootparams);
*/
-GLOBAL(protected_mode_jump)
+SYM_FUNC_START_NOALIGN(protected_mode_jump)
movl %edx, %esi # Pointer to boot_params table
xorl %ebx, %ebx
@@ -40,13 +40,13 @@ GLOBAL(protected_mode_jump)
# Transition to 32-bit mode
.byte 0x66, 0xea # ljmpl opcode
-2: .long in_pm32 # offset
+2: .long .Lin_pm32 # offset
.word __BOOT_CS # segment
-ENDPROC(protected_mode_jump)
+SYM_FUNC_END(protected_mode_jump)
.code32
.section ".text32","ax"
-GLOBAL(in_pm32)
+SYM_FUNC_START_LOCAL_NOALIGN(.Lin_pm32)
# Set up data segments for flat 32-bit mode
movl %ecx, %ds
movl %ecx, %es
@@ -72,4 +72,4 @@ GLOBAL(in_pm32)
lldt %cx
jmpl *%eax # Jump to the 32-bit entrypoint
-ENDPROC(in_pm32)
+SYM_FUNC_END(.Lin_pm32)
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c
index a93d44e58f9c..55e669d29e54 100644
--- a/arch/x86/boot/tools/build.c
+++ b/arch/x86/boot/tools/build.c
@@ -56,6 +56,7 @@ u8 buf[SETUP_SECT_MAX*512];
unsigned long efi32_stub_entry;
unsigned long efi64_stub_entry;
unsigned long efi_pe_entry;
+unsigned long kernel_info;
unsigned long startup_64;
/*----------------------------------------------------------------------*/
@@ -321,6 +322,7 @@ static void parse_zoffset(char *fname)
PARSE_ZOFS(p, efi32_stub_entry);
PARSE_ZOFS(p, efi64_stub_entry);
PARSE_ZOFS(p, efi_pe_entry);
+ PARSE_ZOFS(p, kernel_info);
PARSE_ZOFS(p, startup_64);
p = strchr(p, '\n');
@@ -410,6 +412,9 @@ int main(int argc, char ** argv)
efi_stub_entry_update();
+ /* Update kernel_info offset. */
+ put_unaligned_le32(kernel_info, &buf[0x268]);
+
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, dest) != i)
die("Writing setup failed");
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index d0a5ffeae8df..0b9654c7a05c 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -25,7 +25,6 @@ CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_SMP=y
-CONFIG_CALGARY_IOMMU=y
CONFIG_NR_CPUS=64
CONFIG_SCHED_SMT=y
CONFIG_PREEMPT_VOLUNTARY=y
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 759b1a927826..958440eae27e 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -39,6 +39,7 @@ obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
+obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@@ -48,6 +49,7 @@ ifeq ($(avx_supported),yes)
obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX_X86_64) += twofish-avx-x86_64.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX_X86_64) += serpent-avx-x86_64.o
+ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
endif
# These modules require assembler to support AVX2.
@@ -70,6 +72,7 @@ serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
+blake2s-x86_64-y := blake2s-core.o blake2s-glue.o
ifeq ($(avx_supported),yes)
camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \
diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index 4434607e366d..51d46d93efbc 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -71,7 +71,7 @@
* %r8
* %r9
*/
-__load_partial:
+SYM_FUNC_START_LOCAL(__load_partial)
xor %r9d, %r9d
pxor MSG, MSG
@@ -123,7 +123,7 @@ __load_partial:
.Lld_partial_8:
ret
-ENDPROC(__load_partial)
+SYM_FUNC_END(__load_partial)
/*
* __store_partial: internal ABI
@@ -137,7 +137,7 @@ ENDPROC(__load_partial)
* %r9
* %r10
*/
-__store_partial:
+SYM_FUNC_START_LOCAL(__store_partial)
mov LEN, %r8
mov DST, %r9
@@ -181,12 +181,12 @@ __store_partial:
.Lst_partial_1:
ret
-ENDPROC(__store_partial)
+SYM_FUNC_END(__store_partial)
/*
* void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv);
*/
-ENTRY(crypto_aegis128_aesni_init)
+SYM_FUNC_START(crypto_aegis128_aesni_init)
FRAME_BEGIN
/* load IV: */
@@ -226,13 +226,13 @@ ENTRY(crypto_aegis128_aesni_init)
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_init)
+SYM_FUNC_END(crypto_aegis128_aesni_init)
/*
* void crypto_aegis128_aesni_ad(void *state, unsigned int length,
* const void *data);
*/
-ENTRY(crypto_aegis128_aesni_ad)
+SYM_FUNC_START(crypto_aegis128_aesni_ad)
FRAME_BEGIN
cmp $0x10, LEN
@@ -378,7 +378,7 @@ ENTRY(crypto_aegis128_aesni_ad)
.Lad_out:
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_ad)
+SYM_FUNC_END(crypto_aegis128_aesni_ad)
.macro encrypt_block a s0 s1 s2 s3 s4 i
movdq\a (\i * 0x10)(SRC), MSG
@@ -402,7 +402,7 @@ ENDPROC(crypto_aegis128_aesni_ad)
* void crypto_aegis128_aesni_enc(void *state, unsigned int length,
* const void *src, void *dst);
*/
-ENTRY(crypto_aegis128_aesni_enc)
+SYM_FUNC_START(crypto_aegis128_aesni_enc)
FRAME_BEGIN
cmp $0x10, LEN
@@ -493,13 +493,13 @@ ENTRY(crypto_aegis128_aesni_enc)
.Lenc_out:
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_enc)
+SYM_FUNC_END(crypto_aegis128_aesni_enc)
/*
* void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
-ENTRY(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
FRAME_BEGIN
/* load the state: */
@@ -533,7 +533,7 @@ ENTRY(crypto_aegis128_aesni_enc_tail)
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_enc_tail)
+SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
.macro decrypt_block a s0 s1 s2 s3 s4 i
movdq\a (\i * 0x10)(SRC), MSG
@@ -556,7 +556,7 @@ ENDPROC(crypto_aegis128_aesni_enc_tail)
* void crypto_aegis128_aesni_dec(void *state, unsigned int length,
* const void *src, void *dst);
*/
-ENTRY(crypto_aegis128_aesni_dec)
+SYM_FUNC_START(crypto_aegis128_aesni_dec)
FRAME_BEGIN
cmp $0x10, LEN
@@ -647,13 +647,13 @@ ENTRY(crypto_aegis128_aesni_dec)
.Ldec_out:
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_dec)
+SYM_FUNC_END(crypto_aegis128_aesni_dec)
/*
* void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
* const void *src, void *dst);
*/
-ENTRY(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
FRAME_BEGIN
/* load the state: */
@@ -697,13 +697,13 @@ ENTRY(crypto_aegis128_aesni_dec_tail)
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_dec_tail)
+SYM_FUNC_END(crypto_aegis128_aesni_dec_tail)
/*
* void crypto_aegis128_aesni_final(void *state, void *tag_xor,
* u64 assoclen, u64 cryptlen);
*/
-ENTRY(crypto_aegis128_aesni_final)
+SYM_FUNC_START(crypto_aegis128_aesni_final)
FRAME_BEGIN
/* load the state: */
@@ -744,4 +744,4 @@ ENTRY(crypto_aegis128_aesni_final)
FRAME_END
ret
-ENDPROC(crypto_aegis128_aesni_final)
+SYM_FUNC_END(crypto_aegis128_aesni_final)
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
index 5f6a5af9c489..ec437db1fa54 100644
--- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
+++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S
@@ -544,11 +544,11 @@ ddq_add_8:
* aes_ctr_enc_128_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
-ENTRY(aes_ctr_enc_128_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_128_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_128
-ENDPROC(aes_ctr_enc_128_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_128_avx_by8)
/*
* routine to do AES192 CTR enc/decrypt "by8"
@@ -557,11 +557,11 @@ ENDPROC(aes_ctr_enc_128_avx_by8)
* aes_ctr_enc_192_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
-ENTRY(aes_ctr_enc_192_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_192_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_192
-ENDPROC(aes_ctr_enc_192_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_192_avx_by8)
/*
* routine to do AES256 CTR enc/decrypt "by8"
@@ -570,8 +570,8 @@ ENDPROC(aes_ctr_enc_192_avx_by8)
* aes_ctr_enc_256_avx_by8(void *in, void *iv, void *keys, void *out,
* unsigned int num_bytes)
*/
-ENTRY(aes_ctr_enc_256_avx_by8)
+SYM_FUNC_START(aes_ctr_enc_256_avx_by8)
/* call the aes main loop */
do_aes_ctrmain KEY_256
-ENDPROC(aes_ctr_enc_256_avx_by8)
+SYM_FUNC_END(aes_ctr_enc_256_avx_by8)
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index e40bdf024ba7..d28503f99f58 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1592,7 +1592,7 @@ _esb_loop_\@:
* poly = x^128 + x^127 + x^126 + x^121 + 1
*
*****************************************************************************/
-ENTRY(aesni_gcm_dec)
+SYM_FUNC_START(aesni_gcm_dec)
FUNC_SAVE
GCM_INIT %arg6, arg7, arg8, arg9
@@ -1600,7 +1600,7 @@ ENTRY(aesni_gcm_dec)
GCM_COMPLETE arg10, arg11
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_dec)
+SYM_FUNC_END(aesni_gcm_dec)
/*****************************************************************************
@@ -1680,7 +1680,7 @@ ENDPROC(aesni_gcm_dec)
*
* poly = x^128 + x^127 + x^126 + x^121 + 1
***************************************************************************/
-ENTRY(aesni_gcm_enc)
+SYM_FUNC_START(aesni_gcm_enc)
FUNC_SAVE
GCM_INIT %arg6, arg7, arg8, arg9
@@ -1689,7 +1689,7 @@ ENTRY(aesni_gcm_enc)
GCM_COMPLETE arg10, arg11
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc)
+SYM_FUNC_END(aesni_gcm_enc)
/*****************************************************************************
* void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
@@ -1702,12 +1702,12 @@ ENDPROC(aesni_gcm_enc)
* const u8 *aad, // Additional Authentication Data (AAD)
* u64 aad_len) // Length of AAD in bytes.
*/
-ENTRY(aesni_gcm_init)
+SYM_FUNC_START(aesni_gcm_init)
FUNC_SAVE
GCM_INIT %arg3, %arg4,%arg5, %arg6
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_init)
+SYM_FUNC_END(aesni_gcm_init)
/*****************************************************************************
* void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
@@ -1717,12 +1717,12 @@ ENDPROC(aesni_gcm_init)
* const u8 *in, // Plaintext input
* u64 plaintext_len, // Length of data in bytes for encryption.
*/
-ENTRY(aesni_gcm_enc_update)
+SYM_FUNC_START(aesni_gcm_enc_update)
FUNC_SAVE
GCM_ENC_DEC enc
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc_update)
+SYM_FUNC_END(aesni_gcm_enc_update)
/*****************************************************************************
* void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
@@ -1732,12 +1732,12 @@ ENDPROC(aesni_gcm_enc_update)
* const u8 *in, // Plaintext input
* u64 plaintext_len, // Length of data in bytes for encryption.
*/
-ENTRY(aesni_gcm_dec_update)
+SYM_FUNC_START(aesni_gcm_dec_update)
FUNC_SAVE
GCM_ENC_DEC dec
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_dec_update)
+SYM_FUNC_END(aesni_gcm_dec_update)
/*****************************************************************************
* void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
@@ -1747,19 +1747,18 @@ ENDPROC(aesni_gcm_dec_update)
* u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
* // 12 or 8.
*/
-ENTRY(aesni_gcm_finalize)
+SYM_FUNC_START(aesni_gcm_finalize)
FUNC_SAVE
GCM_COMPLETE %arg3 %arg4
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_finalize)
+SYM_FUNC_END(aesni_gcm_finalize)
#endif
-.align 4
-_key_expansion_128:
-_key_expansion_256a:
+SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
+SYM_FUNC_START_LOCAL(_key_expansion_256a)
pshufd $0b11111111, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1769,11 +1768,10 @@ _key_expansion_256a:
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
-ENDPROC(_key_expansion_128)
-ENDPROC(_key_expansion_256a)
+SYM_FUNC_END(_key_expansion_256a)
+SYM_FUNC_END_ALIAS(_key_expansion_128)
-.align 4
-_key_expansion_192a:
+SYM_FUNC_START_LOCAL(_key_expansion_192a)
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1795,10 +1793,9 @@ _key_expansion_192a:
movaps %xmm1, 0x10(TKEYP)
add $0x20, TKEYP
ret
-ENDPROC(_key_expansion_192a)
+SYM_FUNC_END(_key_expansion_192a)
-.align 4
-_key_expansion_192b:
+SYM_FUNC_START_LOCAL(_key_expansion_192b)
pshufd $0b01010101, %xmm1, %xmm1
shufps $0b00010000, %xmm0, %xmm4
pxor %xmm4, %xmm0
@@ -1815,10 +1812,9 @@ _key_expansion_192b:
movaps %xmm0, (TKEYP)
add $0x10, TKEYP
ret
-ENDPROC(_key_expansion_192b)
+SYM_FUNC_END(_key_expansion_192b)
-.align 4
-_key_expansion_256b:
+SYM_FUNC_START_LOCAL(_key_expansion_256b)
pshufd $0b10101010, %xmm1, %xmm1
shufps $0b00010000, %xmm2, %xmm4
pxor %xmm4, %xmm2
@@ -1828,13 +1824,13 @@ _key_expansion_256b:
movaps %xmm2, (TKEYP)
add $0x10, TKEYP
ret
-ENDPROC(_key_expansion_256b)
+SYM_FUNC_END(_key_expansion_256b)
/*
* int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
* unsigned int key_len)
*/
-ENTRY(aesni_set_key)
+SYM_FUNC_START(aesni_set_key)
FRAME_BEGIN
#ifndef __x86_64__
pushl KEYP
@@ -1943,12 +1939,12 @@ ENTRY(aesni_set_key)
#endif
FRAME_END
ret
-ENDPROC(aesni_set_key)
+SYM_FUNC_END(aesni_set_key)
/*
* void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
-ENTRY(aesni_enc)
+SYM_FUNC_START(aesni_enc)
FRAME_BEGIN
#ifndef __x86_64__
pushl KEYP
@@ -1967,7 +1963,7 @@ ENTRY(aesni_enc)
#endif
FRAME_END
ret
-ENDPROC(aesni_enc)
+SYM_FUNC_END(aesni_enc)
/*
* _aesni_enc1: internal ABI
@@ -1981,8 +1977,7 @@ ENDPROC(aesni_enc)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_enc1:
+SYM_FUNC_START_LOCAL(_aesni_enc1)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE # round 0
@@ -2025,7 +2020,7 @@ _aesni_enc1:
movaps 0x70(TKEYP), KEY
AESENCLAST KEY STATE
ret
-ENDPROC(_aesni_enc1)
+SYM_FUNC_END(_aesni_enc1)
/*
* _aesni_enc4: internal ABI
@@ -2045,8 +2040,7 @@ ENDPROC(_aesni_enc1)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_enc4:
+SYM_FUNC_START_LOCAL(_aesni_enc4)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE1 # round 0
@@ -2134,12 +2128,12 @@ _aesni_enc4:
AESENCLAST KEY STATE3
AESENCLAST KEY STATE4
ret
-ENDPROC(_aesni_enc4)
+SYM_FUNC_END(_aesni_enc4)
/*
* void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
*/
-ENTRY(aesni_dec)
+SYM_FUNC_START(aesni_dec)
FRAME_BEGIN
#ifndef __x86_64__
pushl KEYP
@@ -2159,7 +2153,7 @@ ENTRY(aesni_dec)
#endif
FRAME_END
ret
-ENDPROC(aesni_dec)
+SYM_FUNC_END(aesni_dec)
/*
* _aesni_dec1: internal ABI
@@ -2173,8 +2167,7 @@ ENDPROC(aesni_dec)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_dec1:
+SYM_FUNC_START_LOCAL(_aesni_dec1)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE # round 0
@@ -2217,7 +2210,7 @@ _aesni_dec1:
movaps 0x70(TKEYP), KEY
AESDECLAST KEY STATE
ret
-ENDPROC(_aesni_dec1)
+SYM_FUNC_END(_aesni_dec1)
/*
* _aesni_dec4: internal ABI
@@ -2237,8 +2230,7 @@ ENDPROC(_aesni_dec1)
* KEY
* TKEYP (T1)
*/
-.align 4
-_aesni_dec4:
+SYM_FUNC_START_LOCAL(_aesni_dec4)
movaps (KEYP), KEY # key
mov KEYP, TKEYP
pxor KEY, STATE1 # round 0
@@ -2326,13 +2318,13 @@ _aesni_dec4:
AESDECLAST KEY STATE3
AESDECLAST KEY STATE4
ret
-ENDPROC(_aesni_dec4)
+SYM_FUNC_END(_aesni_dec4)
/*
* void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len)
*/
-ENTRY(aesni_ecb_enc)
+SYM_FUNC_START(aesni_ecb_enc)
FRAME_BEGIN
#ifndef __x86_64__
pushl LEN
@@ -2386,13 +2378,13 @@ ENTRY(aesni_ecb_enc)
#endif
FRAME_END
ret
-ENDPROC(aesni_ecb_enc)
+SYM_FUNC_END(aesni_ecb_enc)
/*
* void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len);
*/
-ENTRY(aesni_ecb_dec)
+SYM_FUNC_START(aesni_ecb_dec)
FRAME_BEGIN
#ifndef __x86_64__
pushl LEN
@@ -2447,13 +2439,13 @@ ENTRY(aesni_ecb_dec)
#endif
FRAME_END
ret
-ENDPROC(aesni_ecb_dec)
+SYM_FUNC_END(aesni_ecb_dec)
/*
* void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len, u8 *iv)
*/
-ENTRY(aesni_cbc_enc)
+SYM_FUNC_START(aesni_cbc_enc)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
@@ -2491,13 +2483,13 @@ ENTRY(aesni_cbc_enc)
#endif
FRAME_END
ret
-ENDPROC(aesni_cbc_enc)
+SYM_FUNC_END(aesni_cbc_enc)
/*
* void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len, u8 *iv)
*/
-ENTRY(aesni_cbc_dec)
+SYM_FUNC_START(aesni_cbc_dec)
FRAME_BEGIN
#ifndef __x86_64__
pushl IVP
@@ -2584,7 +2576,7 @@ ENTRY(aesni_cbc_dec)
#endif
FRAME_END
ret
-ENDPROC(aesni_cbc_dec)
+SYM_FUNC_END(aesni_cbc_dec)
#ifdef __x86_64__
.pushsection .rodata
@@ -2604,8 +2596,7 @@ ENDPROC(aesni_cbc_dec)
* INC: == 1, in little endian
* BSWAP_MASK == endian swapping mask
*/
-.align 4
-_aesni_inc_init:
+SYM_FUNC_START_LOCAL(_aesni_inc_init)
movaps .Lbswap_mask, BSWAP_MASK
movaps IV, CTR
PSHUFB_XMM BSWAP_MASK CTR
@@ -2613,7 +2604,7 @@ _aesni_inc_init:
MOVQ_R64_XMM TCTR_LOW INC
MOVQ_R64_XMM CTR TCTR_LOW
ret
-ENDPROC(_aesni_inc_init)
+SYM_FUNC_END(_aesni_inc_init)
/*
* _aesni_inc: internal ABI
@@ -2630,8 +2621,7 @@ ENDPROC(_aesni_inc_init)
* CTR: == output IV, in little endian
* TCTR_LOW: == lower qword of CTR
*/
-.align 4
-_aesni_inc:
+SYM_FUNC_START_LOCAL(_aesni_inc)
paddq INC, CTR
add $1, TCTR_LOW
jnc .Linc_low
@@ -2642,13 +2632,13 @@ _aesni_inc:
movaps CTR, IV
PSHUFB_XMM BSWAP_MASK IV
ret
-ENDPROC(_aesni_inc)
+SYM_FUNC_END(_aesni_inc)
/*
* void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* size_t len, u8 *iv)
*/
-ENTRY(aesni_ctr_enc)
+SYM_FUNC_START(aesni_ctr_enc)
FRAME_BEGIN
cmp $16, LEN
jb .Lctr_enc_just_ret
@@ -2705,7 +2695,7 @@ ENTRY(aesni_ctr_enc)
.Lctr_enc_just_ret:
FRAME_END
ret
-ENDPROC(aesni_ctr_enc)
+SYM_FUNC_END(aesni_ctr_enc)
/*
* _aesni_gf128mul_x_ble: internal ABI
@@ -2729,7 +2719,7 @@ ENDPROC(aesni_ctr_enc)
* void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
* bool enc, u8 *iv)
*/
-ENTRY(aesni_xts_crypt8)
+SYM_FUNC_START(aesni_xts_crypt8)
FRAME_BEGIN
cmpb $0, %cl
movl $0, %ecx
@@ -2833,6 +2823,6 @@ ENTRY(aesni_xts_crypt8)
FRAME_END
ret
-ENDPROC(aesni_xts_crypt8)
+SYM_FUNC_END(aesni_xts_crypt8)
#endif
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S
index 91c039ab5699..bfa1c0b3e5b4 100644
--- a/arch/x86/crypto/aesni-intel_avx-x86_64.S
+++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S
@@ -1775,12 +1775,12 @@ _initial_blocks_done\@:
# const u8 *aad, /* Additional Authentication Data (AAD)*/
# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
#############################################################
-ENTRY(aesni_gcm_init_avx_gen2)
+SYM_FUNC_START(aesni_gcm_init_avx_gen2)
FUNC_SAVE
INIT GHASH_MUL_AVX, PRECOMPUTE_AVX
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_init_avx_gen2)
+SYM_FUNC_END(aesni_gcm_init_avx_gen2)
###############################################################################
#void aesni_gcm_enc_update_avx_gen2(
@@ -1790,7 +1790,7 @@ ENDPROC(aesni_gcm_init_avx_gen2)
# const u8 *in, /* Plaintext input */
# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen2)
+SYM_FUNC_START(aesni_gcm_enc_update_avx_gen2)
FUNC_SAVE
mov keysize, %eax
cmp $32, %eax
@@ -1809,7 +1809,7 @@ key_256_enc_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, ENC, 13
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc_update_avx_gen2)
+SYM_FUNC_END(aesni_gcm_enc_update_avx_gen2)
###############################################################################
#void aesni_gcm_dec_update_avx_gen2(
@@ -1819,7 +1819,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen2)
# const u8 *in, /* Ciphertext input */
# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen2)
+SYM_FUNC_START(aesni_gcm_dec_update_avx_gen2)
FUNC_SAVE
mov keysize,%eax
cmp $32, %eax
@@ -1838,7 +1838,7 @@ key_256_dec_update:
GCM_ENC_DEC INITIAL_BLOCKS_AVX, GHASH_8_ENCRYPT_8_PARALLEL_AVX, GHASH_LAST_8_AVX, GHASH_MUL_AVX, DEC, 13
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_dec_update_avx_gen2)
+SYM_FUNC_END(aesni_gcm_dec_update_avx_gen2)
###############################################################################
#void aesni_gcm_finalize_avx_gen2(
@@ -1848,7 +1848,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen2)
# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
# Valid values are 16 (most likely), 12 or 8. */
###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen2)
+SYM_FUNC_START(aesni_gcm_finalize_avx_gen2)
FUNC_SAVE
mov keysize,%eax
cmp $32, %eax
@@ -1867,7 +1867,7 @@ key_256_finalize:
GCM_COMPLETE GHASH_MUL_AVX, 13, arg3, arg4
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_finalize_avx_gen2)
+SYM_FUNC_END(aesni_gcm_finalize_avx_gen2)
#endif /* CONFIG_AS_AVX */
@@ -2746,12 +2746,12 @@ _initial_blocks_done\@:
# const u8 *aad, /* Additional Authentication Data (AAD)*/
# u64 aad_len) /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */
#############################################################
-ENTRY(aesni_gcm_init_avx_gen4)
+SYM_FUNC_START(aesni_gcm_init_avx_gen4)
FUNC_SAVE
INIT GHASH_MUL_AVX2, PRECOMPUTE_AVX2
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_init_avx_gen4)
+SYM_FUNC_END(aesni_gcm_init_avx_gen4)
###############################################################################
#void aesni_gcm_enc_avx_gen4(
@@ -2761,7 +2761,7 @@ ENDPROC(aesni_gcm_init_avx_gen4)
# const u8 *in, /* Plaintext input */
# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_enc_update_avx_gen4)
+SYM_FUNC_START(aesni_gcm_enc_update_avx_gen4)
FUNC_SAVE
mov keysize,%eax
cmp $32, %eax
@@ -2780,7 +2780,7 @@ key_256_enc_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, ENC, 13
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_enc_update_avx_gen4)
+SYM_FUNC_END(aesni_gcm_enc_update_avx_gen4)
###############################################################################
#void aesni_gcm_dec_update_avx_gen4(
@@ -2790,7 +2790,7 @@ ENDPROC(aesni_gcm_enc_update_avx_gen4)
# const u8 *in, /* Ciphertext input */
# u64 plaintext_len) /* Length of data in Bytes for encryption. */
###############################################################################
-ENTRY(aesni_gcm_dec_update_avx_gen4)
+SYM_FUNC_START(aesni_gcm_dec_update_avx_gen4)
FUNC_SAVE
mov keysize,%eax
cmp $32, %eax
@@ -2809,7 +2809,7 @@ key_256_dec_update4:
GCM_ENC_DEC INITIAL_BLOCKS_AVX2, GHASH_8_ENCRYPT_8_PARALLEL_AVX2, GHASH_LAST_8_AVX2, GHASH_MUL_AVX2, DEC, 13
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_dec_update_avx_gen4)
+SYM_FUNC_END(aesni_gcm_dec_update_avx_gen4)
###############################################################################
#void aesni_gcm_finalize_avx_gen4(
@@ -2819,7 +2819,7 @@ ENDPROC(aesni_gcm_dec_update_avx_gen4)
# u64 auth_tag_len)# /* Authenticated Tag Length in bytes.
# Valid values are 16 (most likely), 12 or 8. */
###############################################################################
-ENTRY(aesni_gcm_finalize_avx_gen4)
+SYM_FUNC_START(aesni_gcm_finalize_avx_gen4)
FUNC_SAVE
mov keysize,%eax
cmp $32, %eax
@@ -2838,6 +2838,6 @@ key_256_finalize4:
GCM_COMPLETE GHASH_MUL_AVX2, 13, arg3, arg4
FUNC_RESTORE
ret
-ENDPROC(aesni_gcm_finalize_avx_gen4)
+SYM_FUNC_END(aesni_gcm_finalize_avx_gen4)
#endif /* CONFIG_AS_AVX2 */
diff --git a/arch/x86/crypto/blake2s-core.S b/arch/x86/crypto/blake2s-core.S
new file mode 100644
index 000000000000..24910b766bdd
--- /dev/null
+++ b/arch/x86/crypto/blake2s-core.S
@@ -0,0 +1,258 @@
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2017-2019 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <linux/linkage.h>
+
+.section .rodata.cst32.BLAKE2S_IV, "aM", @progbits, 32
+.align 32
+IV: .octa 0xA54FF53A3C6EF372BB67AE856A09E667
+ .octa 0x5BE0CD191F83D9AB9B05688C510E527F
+.section .rodata.cst16.ROT16, "aM", @progbits, 16
+.align 16
+ROT16: .octa 0x0D0C0F0E09080B0A0504070601000302
+.section .rodata.cst16.ROR328, "aM", @progbits, 16
+.align 16
+ROR328: .octa 0x0C0F0E0D080B0A090407060500030201
+.section .rodata.cst64.BLAKE2S_SIGMA, "aM", @progbits, 160
+.align 64
+SIGMA:
+.byte 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.byte 14, 4, 9, 13, 10, 8, 15, 6, 5, 1, 0, 11, 3, 12, 2, 7
+.byte 11, 12, 5, 15, 8, 0, 2, 13, 9, 10, 3, 7, 4, 14, 6, 1
+.byte 7, 3, 13, 11, 9, 1, 12, 14, 15, 2, 5, 4, 8, 6, 10, 0
+.byte 9, 5, 2, 10, 0, 7, 4, 15, 3, 14, 11, 6, 13, 1, 12, 8
+.byte 2, 6, 0, 8, 12, 10, 11, 3, 1, 4, 7, 15, 9, 13, 5, 14
+.byte 12, 1, 14, 4, 5, 15, 13, 10, 8, 0, 6, 9, 11, 7, 3, 2
+.byte 13, 7, 12, 3, 11, 14, 1, 9, 2, 5, 15, 8, 10, 0, 4, 6
+.byte 6, 14, 11, 0, 15, 9, 3, 8, 10, 12, 13, 1, 5, 2, 7, 4
+.byte 10, 8, 7, 1, 2, 4, 6, 5, 13, 15, 9, 3, 0, 11, 14, 12
+#ifdef CONFIG_AS_AVX512
+.section .rodata.cst64.BLAKE2S_SIGMA2, "aM", @progbits, 640
+.align 64
+SIGMA2:
+.long 0, 2, 4, 6, 1, 3, 5, 7, 14, 8, 10, 12, 15, 9, 11, 13
+.long 8, 2, 13, 15, 10, 9, 12, 3, 6, 4, 0, 14, 5, 11, 1, 7
+.long 11, 13, 8, 6, 5, 10, 14, 3, 2, 4, 12, 15, 1, 0, 7, 9
+.long 11, 10, 7, 0, 8, 15, 1, 13, 3, 6, 2, 12, 4, 14, 9, 5
+.long 4, 10, 9, 14, 15, 0, 11, 8, 1, 7, 3, 13, 2, 5, 6, 12
+.long 2, 11, 4, 15, 14, 3, 10, 8, 13, 6, 5, 7, 0, 12, 1, 9
+.long 4, 8, 15, 9, 14, 11, 13, 5, 3, 2, 1, 12, 6, 10, 7, 0
+.long 6, 13, 0, 14, 12, 2, 1, 11, 15, 4, 5, 8, 7, 9, 3, 10
+.long 15, 5, 4, 13, 10, 7, 3, 11, 12, 2, 0, 6, 9, 8, 1, 14
+.long 8, 7, 14, 11, 13, 15, 0, 12, 10, 4, 5, 6, 3, 2, 1, 9
+#endif /* CONFIG_AS_AVX512 */
+
+.text
+#ifdef CONFIG_AS_SSSE3
+SYM_FUNC_START(blake2s_compress_ssse3)
+ testq %rdx,%rdx
+ je .Lendofloop
+ movdqu (%rdi),%xmm0
+ movdqu 0x10(%rdi),%xmm1
+ movdqa ROT16(%rip),%xmm12
+ movdqa ROR328(%rip),%xmm13
+ movdqu 0x20(%rdi),%xmm14
+ movq %rcx,%xmm15
+ leaq SIGMA+0xa0(%rip),%r8
+ jmp .Lbeginofloop
+ .align 32
+.Lbeginofloop:
+ movdqa %xmm0,%xmm10
+ movdqa %xmm1,%xmm11
+ paddq %xmm15,%xmm14
+ movdqa IV(%rip),%xmm2
+ movdqa %xmm14,%xmm3
+ pxor IV+0x10(%rip),%xmm3
+ leaq SIGMA(%rip),%rcx
+.Lroundloop:
+ movzbl (%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0x1(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x2(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x3(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ punpckldq %xmm5,%xmm4
+ punpckldq %xmm7,%xmm6
+ punpcklqdq %xmm6,%xmm4
+ paddd %xmm4,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0x4(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0x5(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x6(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0x7(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ punpckldq %xmm6,%xmm5
+ punpckldq %xmm4,%xmm7
+ punpcklqdq %xmm7,%xmm5
+ paddd %xmm5,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x93,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x39,%xmm2,%xmm2
+ movzbl 0x8(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ movzbl 0x9(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xa(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xb(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ punpckldq %xmm7,%xmm6
+ punpckldq %xmm5,%xmm4
+ punpcklqdq %xmm4,%xmm6
+ paddd %xmm6,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm12,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0xc,%xmm1
+ pslld $0x14,%xmm8
+ por %xmm8,%xmm1
+ movzbl 0xc(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm7
+ movzbl 0xd(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm4
+ movzbl 0xe(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm5
+ movzbl 0xf(%rcx),%eax
+ movd (%rsi,%rax,4),%xmm6
+ punpckldq %xmm4,%xmm7
+ punpckldq %xmm6,%xmm5
+ punpcklqdq %xmm5,%xmm7
+ paddd %xmm7,%xmm0
+ paddd %xmm1,%xmm0
+ pxor %xmm0,%xmm3
+ pshufb %xmm13,%xmm3
+ paddd %xmm3,%xmm2
+ pxor %xmm2,%xmm1
+ movdqa %xmm1,%xmm8
+ psrld $0x7,%xmm1
+ pslld $0x19,%xmm8
+ por %xmm8,%xmm1
+ pshufd $0x39,%xmm0,%xmm0
+ pshufd $0x4e,%xmm3,%xmm3
+ pshufd $0x93,%xmm2,%xmm2
+ addq $0x10,%rcx
+ cmpq %r8,%rcx
+ jnz .Lroundloop
+ pxor %xmm2,%xmm0
+ pxor %xmm3,%xmm1
+ pxor %xmm10,%xmm0
+ pxor %xmm11,%xmm1
+ addq $0x40,%rsi
+ decq %rdx
+ jnz .Lbeginofloop
+ movdqu %xmm0,(%rdi)
+ movdqu %xmm1,0x10(%rdi)
+ movdqu %xmm14,0x20(%rdi)
+.Lendofloop:
+ ret
+SYM_FUNC_END(blake2s_compress_ssse3)
+#endif /* CONFIG_AS_SSSE3 */
+
+#ifdef CONFIG_AS_AVX512
+SYM_FUNC_START(blake2s_compress_avx512)
+ vmovdqu (%rdi),%xmm0
+ vmovdqu 0x10(%rdi),%xmm1
+ vmovdqu 0x20(%rdi),%xmm4
+ vmovq %rcx,%xmm5
+ vmovdqa IV(%rip),%xmm14
+ vmovdqa IV+16(%rip),%xmm15
+ jmp .Lblake2s_compress_avx512_mainloop
+.align 32
+.Lblake2s_compress_avx512_mainloop:
+ vmovdqa %xmm0,%xmm10
+ vmovdqa %xmm1,%xmm11
+ vpaddq %xmm5,%xmm4,%xmm4
+ vmovdqa %xmm14,%xmm2
+ vpxor %xmm15,%xmm4,%xmm3
+ vmovdqu (%rsi),%ymm6
+ vmovdqu 0x20(%rsi),%ymm7
+ addq $0x40,%rsi
+ leaq SIGMA2(%rip),%rax
+ movb $0xa,%cl
+.Lblake2s_compress_avx512_roundloop:
+ addq $0x40,%rax
+ vmovdqa -0x40(%rax),%ymm8
+ vmovdqa -0x20(%rax),%ymm9
+ vpermi2d %ymm7,%ymm6,%ymm8
+ vpermi2d %ymm7,%ymm6,%ymm9
+ vmovdqa %ymm8,%ymm6
+ vmovdqa %ymm9,%ymm7
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm8,%xmm8
+ vpaddd %xmm8,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x93,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x39,%xmm2,%xmm2
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x10,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0xc,%xmm1,%xmm1
+ vextracti128 $0x1,%ymm9,%xmm9
+ vpaddd %xmm9,%xmm0,%xmm0
+ vpaddd %xmm1,%xmm0,%xmm0
+ vpxor %xmm0,%xmm3,%xmm3
+ vprord $0x8,%xmm3,%xmm3
+ vpaddd %xmm3,%xmm2,%xmm2
+ vpxor %xmm2,%xmm1,%xmm1
+ vprord $0x7,%xmm1,%xmm1
+ vpshufd $0x39,%xmm0,%xmm0
+ vpshufd $0x4e,%xmm3,%xmm3
+ vpshufd $0x93,%xmm2,%xmm2
+ decb %cl
+ jne .Lblake2s_compress_avx512_roundloop
+ vpxor %xmm10,%xmm0,%xmm0
+ vpxor %xmm11,%xmm1,%xmm1
+ vpxor %xmm2,%xmm0,%xmm0
+ vpxor %xmm3,%xmm1,%xmm1
+ decq %rdx
+ jne .Lblake2s_compress_avx512_mainloop
+ vmovdqu %xmm0,(%rdi)
+ vmovdqu %xmm1,0x10(%rdi)
+ vmovdqu %xmm4,0x20(%rdi)
+ vzeroupper
+ retq
+SYM_FUNC_END(blake2s_compress_avx512)
+#endif /* CONFIG_AS_AVX512 */
diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
new file mode 100644
index 000000000000..4a37ba7cdbe5
--- /dev/null
+++ b/arch/x86/crypto/blake2s-glue.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <crypto/internal/blake2s.h>
+#include <crypto/internal/simd.h>
+#include <crypto/internal/hash.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/fpu/api.h>
+#include <asm/processor.h>
+#include <asm/simd.h>
+
+asmlinkage void blake2s_compress_ssse3(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+asmlinkage void blake2s_compress_avx512(struct blake2s_state *state,
+ const u8 *block, const size_t nblocks,
+ const u32 inc);
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_ssse3);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(blake2s_use_avx512);
+
+void blake2s_compress_arch(struct blake2s_state *state,
+ const u8 *block, size_t nblocks,
+ const u32 inc)
+{
+ /* SIMD disables preemption, so relax after processing each page. */
+ BUILD_BUG_ON(PAGE_SIZE / BLAKE2S_BLOCK_SIZE < 8);
+
+ if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
+ blake2s_compress_generic(state, block, nblocks, inc);
+ return;
+ }
+
+ for (;;) {
+ const size_t blocks = min_t(size_t, nblocks,
+ PAGE_SIZE / BLAKE2S_BLOCK_SIZE);
+
+ kernel_fpu_begin();
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&blake2s_use_avx512))
+ blake2s_compress_avx512(state, block, blocks, inc);
+ else
+ blake2s_compress_ssse3(state, block, blocks, inc);
+ kernel_fpu_end();
+
+ nblocks -= blocks;
+ if (!nblocks)
+ break;
+ block += blocks * BLAKE2S_BLOCK_SIZE;
+ }
+}
+EXPORT_SYMBOL(blake2s_compress_arch);
+
+static int crypto_blake2s_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+
+ if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE) {
+ crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+ return -EINVAL;
+ }
+
+ memcpy(tctx->key, key, keylen);
+ tctx->keylen = keylen;
+
+ return 0;
+}
+
+static int crypto_blake2s_init(struct shash_desc *desc)
+{
+ struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const int outlen = crypto_shash_digestsize(desc->tfm);
+
+ if (tctx->keylen)
+ blake2s_init_key(state, outlen, tctx->key, tctx->keylen);
+ else
+ blake2s_init(state, outlen);
+
+ return 0;
+}
+
+static int crypto_blake2s_update(struct shash_desc *desc, const u8 *in,
+ unsigned int inlen)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+ const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+
+ if (unlikely(!inlen))
+ return 0;
+ if (inlen > fill) {
+ memcpy(state->buf + state->buflen, in, fill);
+ blake2s_compress_arch(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
+ state->buflen = 0;
+ in += fill;
+ inlen -= fill;
+ }
+ if (inlen > BLAKE2S_BLOCK_SIZE) {
+ const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+ /* Hash one less (full) block than strictly possible */
+ blake2s_compress_arch(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
+ in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+ }
+ memcpy(state->buf + state->buflen, in, inlen);
+ state->buflen += inlen;
+
+ return 0;
+}
+
+static int crypto_blake2s_final(struct shash_desc *desc, u8 *out)
+{
+ struct blake2s_state *state = shash_desc_ctx(desc);
+
+ blake2s_set_lastblock(state);
+ memset(state->buf + state->buflen, 0,
+ BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+ blake2s_compress_arch(state, state->buf, 1, state->buflen);
+ cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+ memcpy(out, state->h, state->outlen);
+ memzero_explicit(state, sizeof(*state));
+
+ return 0;
+}
+
+static struct shash_alg blake2s_algs[] = {{
+ .base.cra_name = "blake2s-128",
+ .base.cra_driver_name = "blake2s-128-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_128_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-160",
+ .base.cra_driver_name = "blake2s-160-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_160_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-224",
+ .base.cra_driver_name = "blake2s-224-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_224_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}, {
+ .base.cra_name = "blake2s-256",
+ .base.cra_driver_name = "blake2s-256-x86",
+ .base.cra_flags = CRYPTO_ALG_OPTIONAL_KEY,
+ .base.cra_ctxsize = sizeof(struct blake2s_tfm_ctx),
+ .base.cra_priority = 200,
+ .base.cra_blocksize = BLAKE2S_BLOCK_SIZE,
+ .base.cra_module = THIS_MODULE,
+
+ .digestsize = BLAKE2S_256_HASH_SIZE,
+ .setkey = crypto_blake2s_setkey,
+ .init = crypto_blake2s_init,
+ .update = crypto_blake2s_update,
+ .final = crypto_blake2s_final,
+ .descsize = sizeof(struct blake2s_state),
+}};
+
+static int __init blake2s_mod_init(void)
+{
+ if (!boot_cpu_has(X86_FEATURE_SSSE3))
+ return 0;
+
+ static_branch_enable(&blake2s_use_ssse3);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM |
+ XFEATURE_MASK_AVX512, NULL))
+ static_branch_enable(&blake2s_use_avx512);
+
+ return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+static void __exit blake2s_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+}
+
+module_init(blake2s_mod_init);
+module_exit(blake2s_mod_exit);
+
+MODULE_ALIAS_CRYPTO("blake2s-128");
+MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+MODULE_ALIAS_CRYPTO("blake2s-160");
+MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+MODULE_ALIAS_CRYPTO("blake2s-224");
+MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+MODULE_ALIAS_CRYPTO("blake2s-256");
+MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/blowfish-x86_64-asm_64.S b/arch/x86/crypto/blowfish-x86_64-asm_64.S
index 330db7a48af8..4222ac6d6584 100644
--- a/arch/x86/crypto/blowfish-x86_64-asm_64.S
+++ b/arch/x86/crypto/blowfish-x86_64-asm_64.S
@@ -103,7 +103,7 @@
bswapq RX0; \
xorq RX0, (RIO);
-ENTRY(__blowfish_enc_blk)
+SYM_FUNC_START(__blowfish_enc_blk)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -139,9 +139,9 @@ ENTRY(__blowfish_enc_blk)
.L__enc_xor:
xor_block();
ret;
-ENDPROC(__blowfish_enc_blk)
+SYM_FUNC_END(__blowfish_enc_blk)
-ENTRY(blowfish_dec_blk)
+SYM_FUNC_START(blowfish_dec_blk)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -171,7 +171,7 @@ ENTRY(blowfish_dec_blk)
movq %r11, %r12;
ret;
-ENDPROC(blowfish_dec_blk)
+SYM_FUNC_END(blowfish_dec_blk)
/**********************************************************************
4-way blowfish, four blocks parallel
@@ -283,7 +283,7 @@ ENDPROC(blowfish_dec_blk)
bswapq RX3; \
xorq RX3, 24(RIO);
-ENTRY(__blowfish_enc_blk_4way)
+SYM_FUNC_START(__blowfish_enc_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -330,9 +330,9 @@ ENTRY(__blowfish_enc_blk_4way)
popq %rbx;
popq %r12;
ret;
-ENDPROC(__blowfish_enc_blk_4way)
+SYM_FUNC_END(__blowfish_enc_blk_4way)
-ENTRY(blowfish_dec_blk_4way)
+SYM_FUNC_START(blowfish_dec_blk_4way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -365,4 +365,4 @@ ENTRY(blowfish_dec_blk_4way)
popq %r12;
ret;
-ENDPROC(blowfish_dec_blk_4way)
+SYM_FUNC_END(blowfish_dec_blk_4way)
diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index a14af6eb09cb..d01ddd73de65 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -189,20 +189,20 @@
* larger and would only be 0.5% faster (on sandy-bridge).
*/
.align 8
-roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+SYM_FUNC_START_LOCAL(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
%xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15,
%rcx, (%r9));
ret;
-ENDPROC(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+SYM_FUNC_END(roundsm16_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
-roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+SYM_FUNC_START_LOCAL(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm16(%xmm4, %xmm5, %xmm6, %xmm7, %xmm0, %xmm1, %xmm2, %xmm3,
%xmm12, %xmm13, %xmm14, %xmm15, %xmm8, %xmm9, %xmm10, %xmm11,
%rax, (%r9));
ret;
-ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/*
* IN/OUT:
@@ -722,7 +722,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
-__camellia_enc_blk16:
+SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
@@ -806,10 +806,10 @@ __camellia_enc_blk16:
%xmm15, %rax, %rcx, 24);
jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk16)
+SYM_FUNC_END(__camellia_enc_blk16)
.align 8
-__camellia_dec_blk16:
+SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 256 bytes
@@ -891,9 +891,9 @@ __camellia_dec_blk16:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk16)
+SYM_FUNC_END(__camellia_dec_blk16)
-ENTRY(camellia_ecb_enc_16way)
+SYM_FUNC_START(camellia_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -916,9 +916,9 @@ ENTRY(camellia_ecb_enc_16way)
FRAME_END
ret;
-ENDPROC(camellia_ecb_enc_16way)
+SYM_FUNC_END(camellia_ecb_enc_16way)
-ENTRY(camellia_ecb_dec_16way)
+SYM_FUNC_START(camellia_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -946,9 +946,9 @@ ENTRY(camellia_ecb_dec_16way)
FRAME_END
ret;
-ENDPROC(camellia_ecb_dec_16way)
+SYM_FUNC_END(camellia_ecb_dec_16way)
-ENTRY(camellia_cbc_dec_16way)
+SYM_FUNC_START(camellia_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -997,7 +997,7 @@ ENTRY(camellia_cbc_dec_16way)
FRAME_END
ret;
-ENDPROC(camellia_cbc_dec_16way)
+SYM_FUNC_END(camellia_cbc_dec_16way)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -1005,7 +1005,7 @@ ENDPROC(camellia_cbc_dec_16way)
vpslldq $8, tmp, tmp; \
vpsubq tmp, x, x;
-ENTRY(camellia_ctr_16way)
+SYM_FUNC_START(camellia_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -1110,7 +1110,7 @@ ENTRY(camellia_ctr_16way)
FRAME_END
ret;
-ENDPROC(camellia_ctr_16way)
+SYM_FUNC_END(camellia_ctr_16way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
@@ -1120,7 +1120,7 @@ ENDPROC(camellia_ctr_16way)
vpxor tmp, iv, iv;
.align 8
-camellia_xts_crypt_16way:
+SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -1254,9 +1254,9 @@ camellia_xts_crypt_16way:
FRAME_END
ret;
-ENDPROC(camellia_xts_crypt_16way)
+SYM_FUNC_END(camellia_xts_crypt_16way)
-ENTRY(camellia_xts_enc_16way)
+SYM_FUNC_START(camellia_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -1268,9 +1268,9 @@ ENTRY(camellia_xts_enc_16way)
leaq __camellia_enc_blk16, %r9;
jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_enc_16way)
+SYM_FUNC_END(camellia_xts_enc_16way)
-ENTRY(camellia_xts_dec_16way)
+SYM_FUNC_START(camellia_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -1286,4 +1286,4 @@ ENTRY(camellia_xts_dec_16way)
leaq __camellia_dec_blk16, %r9;
jmp camellia_xts_crypt_16way;
-ENDPROC(camellia_xts_dec_16way)
+SYM_FUNC_END(camellia_xts_dec_16way)
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 4be4c7c3ba27..563ef6e83cdd 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -223,20 +223,20 @@
* larger and would only marginally faster.
*/
.align 8
-roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd:
+SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
%ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
%rcx, (%r9));
ret;
-ENDPROC(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
+SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
.align 8
-roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab:
+SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
%ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
%rax, (%r9));
ret;
-ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
+SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
/*
* IN/OUT:
@@ -760,7 +760,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.text
.align 8
-__camellia_enc_blk32:
+SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 512 bytes
@@ -844,10 +844,10 @@ __camellia_enc_blk32:
%ymm15, %rax, %rcx, 24);
jmp .Lenc_done;
-ENDPROC(__camellia_enc_blk32)
+SYM_FUNC_END(__camellia_enc_blk32)
.align 8
-__camellia_dec_blk32:
+SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
/* input:
* %rdi: ctx, CTX
* %rax: temporary storage, 512 bytes
@@ -929,9 +929,9 @@ __camellia_dec_blk32:
((key_table + (24) * 8) + 4)(CTX));
jmp .Ldec_max24;
-ENDPROC(__camellia_dec_blk32)
+SYM_FUNC_END(__camellia_dec_blk32)
-ENTRY(camellia_ecb_enc_32way)
+SYM_FUNC_START(camellia_ecb_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -958,9 +958,9 @@ ENTRY(camellia_ecb_enc_32way)
FRAME_END
ret;
-ENDPROC(camellia_ecb_enc_32way)
+SYM_FUNC_END(camellia_ecb_enc_32way)
-ENTRY(camellia_ecb_dec_32way)
+SYM_FUNC_START(camellia_ecb_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -992,9 +992,9 @@ ENTRY(camellia_ecb_dec_32way)
FRAME_END
ret;
-ENDPROC(camellia_ecb_dec_32way)
+SYM_FUNC_END(camellia_ecb_dec_32way)
-ENTRY(camellia_cbc_dec_32way)
+SYM_FUNC_START(camellia_cbc_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -1060,7 +1060,7 @@ ENTRY(camellia_cbc_dec_32way)
FRAME_END
ret;
-ENDPROC(camellia_cbc_dec_32way)
+SYM_FUNC_END(camellia_cbc_dec_32way)
#define inc_le128(x, minus_one, tmp) \
vpcmpeqq minus_one, x, tmp; \
@@ -1076,7 +1076,7 @@ ENDPROC(camellia_cbc_dec_32way)
vpslldq $8, tmp1, tmp1; \
vpsubq tmp1, x, x;
-ENTRY(camellia_ctr_32way)
+SYM_FUNC_START(camellia_ctr_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -1200,7 +1200,7 @@ ENTRY(camellia_ctr_32way)
FRAME_END
ret;
-ENDPROC(camellia_ctr_32way)
+SYM_FUNC_END(camellia_ctr_32way)
#define gf128mul_x_ble(iv, mask, tmp) \
vpsrad $31, iv, tmp; \
@@ -1222,7 +1222,7 @@ ENDPROC(camellia_ctr_32way)
vpxor tmp1, iv, iv;
.align 8
-camellia_xts_crypt_32way:
+SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -1367,9 +1367,9 @@ camellia_xts_crypt_32way:
FRAME_END
ret;
-ENDPROC(camellia_xts_crypt_32way)
+SYM_FUNC_END(camellia_xts_crypt_32way)
-ENTRY(camellia_xts_enc_32way)
+SYM_FUNC_START(camellia_xts_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -1382,9 +1382,9 @@ ENTRY(camellia_xts_enc_32way)
leaq __camellia_enc_blk32, %r9;
jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_enc_32way)
+SYM_FUNC_END(camellia_xts_enc_32way)
-ENTRY(camellia_xts_dec_32way)
+SYM_FUNC_START(camellia_xts_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (32 blocks)
@@ -1400,4 +1400,4 @@ ENTRY(camellia_xts_dec_32way)
leaq __camellia_dec_blk32, %r9;
jmp camellia_xts_crypt_32way;
-ENDPROC(camellia_xts_dec_32way)
+SYM_FUNC_END(camellia_xts_dec_32way)
diff --git a/arch/x86/crypto/camellia-x86_64-asm_64.S b/arch/x86/crypto/camellia-x86_64-asm_64.S
index 23528bc18fc6..1372e6408850 100644
--- a/arch/x86/crypto/camellia-x86_64-asm_64.S
+++ b/arch/x86/crypto/camellia-x86_64-asm_64.S
@@ -175,7 +175,7 @@
bswapq RAB0; \
movq RAB0, 4*2(RIO);
-ENTRY(__camellia_enc_blk)
+SYM_FUNC_START(__camellia_enc_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -220,9 +220,9 @@ ENTRY(__camellia_enc_blk)
movq RR12, %r12;
ret;
-ENDPROC(__camellia_enc_blk)
+SYM_FUNC_END(__camellia_enc_blk)
-ENTRY(camellia_dec_blk)
+SYM_FUNC_START(camellia_dec_blk)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -258,7 +258,7 @@ ENTRY(camellia_dec_blk)
movq RR12, %r12;
ret;
-ENDPROC(camellia_dec_blk)
+SYM_FUNC_END(camellia_dec_blk)
/**********************************************************************
2-way camellia
@@ -409,7 +409,7 @@ ENDPROC(camellia_dec_blk)
bswapq RAB1; \
movq RAB1, 12*2(RIO);
-ENTRY(__camellia_enc_blk_2way)
+SYM_FUNC_START(__camellia_enc_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -456,9 +456,9 @@ ENTRY(__camellia_enc_blk_2way)
movq RR12, %r12;
popq %rbx;
ret;
-ENDPROC(__camellia_enc_blk_2way)
+SYM_FUNC_END(__camellia_enc_blk_2way)
-ENTRY(camellia_dec_blk_2way)
+SYM_FUNC_START(camellia_dec_blk_2way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -496,4 +496,4 @@ ENTRY(camellia_dec_blk_2way)
movq RR12, %r12;
movq RXOR, %rbx;
ret;
-ENDPROC(camellia_dec_blk_2way)
+SYM_FUNC_END(camellia_dec_blk_2way)
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index dc55c3332fcc..8a6181b08b59 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -209,7 +209,7 @@
.text
.align 16
-__cast5_enc_blk16:
+SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
/* input:
* %rdi: ctx
* RL1: blocks 1 and 2
@@ -280,10 +280,10 @@ __cast5_enc_blk16:
outunpack_blocks(RR4, RL4, RTMP, RX, RKM);
ret;
-ENDPROC(__cast5_enc_blk16)
+SYM_FUNC_END(__cast5_enc_blk16)
.align 16
-__cast5_dec_blk16:
+SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
/* input:
* %rdi: ctx
* RL1: encrypted blocks 1 and 2
@@ -357,9 +357,9 @@ __cast5_dec_blk16:
.L__skip_dec:
vpsrldq $4, RKR, RKR;
jmp .L__dec_tail;
-ENDPROC(__cast5_dec_blk16)
+SYM_FUNC_END(__cast5_dec_blk16)
-ENTRY(cast5_ecb_enc_16way)
+SYM_FUNC_START(cast5_ecb_enc_16way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -394,9 +394,9 @@ ENTRY(cast5_ecb_enc_16way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast5_ecb_enc_16way)
+SYM_FUNC_END(cast5_ecb_enc_16way)
-ENTRY(cast5_ecb_dec_16way)
+SYM_FUNC_START(cast5_ecb_dec_16way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -432,9 +432,9 @@ ENTRY(cast5_ecb_dec_16way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast5_ecb_dec_16way)
+SYM_FUNC_END(cast5_ecb_dec_16way)
-ENTRY(cast5_cbc_dec_16way)
+SYM_FUNC_START(cast5_cbc_dec_16way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -484,9 +484,9 @@ ENTRY(cast5_cbc_dec_16way)
popq %r12;
FRAME_END
ret;
-ENDPROC(cast5_cbc_dec_16way)
+SYM_FUNC_END(cast5_cbc_dec_16way)
-ENTRY(cast5_ctr_16way)
+SYM_FUNC_START(cast5_ctr_16way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -560,4 +560,4 @@ ENTRY(cast5_ctr_16way)
popq %r12;
FRAME_END
ret;
-ENDPROC(cast5_ctr_16way)
+SYM_FUNC_END(cast5_ctr_16way)
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index 4f0a7cdb94d9..932a3ce32a88 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -247,7 +247,7 @@
.text
.align 8
-__cast6_enc_blk8:
+SYM_FUNC_START_LOCAL(__cast6_enc_blk8)
/* input:
* %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -292,10 +292,10 @@ __cast6_enc_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
-ENDPROC(__cast6_enc_blk8)
+SYM_FUNC_END(__cast6_enc_blk8)
.align 8
-__cast6_dec_blk8:
+SYM_FUNC_START_LOCAL(__cast6_dec_blk8)
/* input:
* %rdi: ctx
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -339,9 +339,9 @@ __cast6_dec_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
ret;
-ENDPROC(__cast6_dec_blk8)
+SYM_FUNC_END(__cast6_dec_blk8)
-ENTRY(cast6_ecb_enc_8way)
+SYM_FUNC_START(cast6_ecb_enc_8way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -362,9 +362,9 @@ ENTRY(cast6_ecb_enc_8way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast6_ecb_enc_8way)
+SYM_FUNC_END(cast6_ecb_enc_8way)
-ENTRY(cast6_ecb_dec_8way)
+SYM_FUNC_START(cast6_ecb_dec_8way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -385,9 +385,9 @@ ENTRY(cast6_ecb_dec_8way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast6_ecb_dec_8way)
+SYM_FUNC_END(cast6_ecb_dec_8way)
-ENTRY(cast6_cbc_dec_8way)
+SYM_FUNC_START(cast6_cbc_dec_8way)
/* input:
* %rdi: ctx
* %rsi: dst
@@ -411,9 +411,9 @@ ENTRY(cast6_cbc_dec_8way)
popq %r12;
FRAME_END
ret;
-ENDPROC(cast6_cbc_dec_8way)
+SYM_FUNC_END(cast6_cbc_dec_8way)
-ENTRY(cast6_ctr_8way)
+SYM_FUNC_START(cast6_ctr_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -439,9 +439,9 @@ ENTRY(cast6_ctr_8way)
popq %r12;
FRAME_END
ret;
-ENDPROC(cast6_ctr_8way)
+SYM_FUNC_END(cast6_ctr_8way)
-ENTRY(cast6_xts_enc_8way)
+SYM_FUNC_START(cast6_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -466,9 +466,9 @@ ENTRY(cast6_xts_enc_8way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast6_xts_enc_8way)
+SYM_FUNC_END(cast6_xts_enc_8way)
-ENTRY(cast6_xts_dec_8way)
+SYM_FUNC_START(cast6_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -493,4 +493,4 @@ ENTRY(cast6_xts_dec_8way)
popq %r15;
FRAME_END
ret;
-ENDPROC(cast6_xts_dec_8way)
+SYM_FUNC_END(cast6_xts_dec_8way)
diff --git a/arch/x86/crypto/chacha-avx2-x86_64.S b/arch/x86/crypto/chacha-avx2-x86_64.S
index 831e4434fc20..ee9a40ab4109 100644
--- a/arch/x86/crypto/chacha-avx2-x86_64.S
+++ b/arch/x86/crypto/chacha-avx2-x86_64.S
@@ -34,7 +34,7 @@ CTR4BL: .octa 0x00000000000000000000000000000002
.text
-ENTRY(chacha_2block_xor_avx2)
+SYM_FUNC_START(chacha_2block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 2 data blocks output, o
# %rdx: up to 2 data blocks input, i
@@ -224,9 +224,9 @@ ENTRY(chacha_2block_xor_avx2)
lea -8(%r10),%rsp
jmp .Ldone2
-ENDPROC(chacha_2block_xor_avx2)
+SYM_FUNC_END(chacha_2block_xor_avx2)
-ENTRY(chacha_4block_xor_avx2)
+SYM_FUNC_START(chacha_4block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 4 data blocks output, o
# %rdx: up to 4 data blocks input, i
@@ -529,9 +529,9 @@ ENTRY(chacha_4block_xor_avx2)
lea -8(%r10),%rsp
jmp .Ldone4
-ENDPROC(chacha_4block_xor_avx2)
+SYM_FUNC_END(chacha_4block_xor_avx2)
-ENTRY(chacha_8block_xor_avx2)
+SYM_FUNC_START(chacha_8block_xor_avx2)
# %rdi: Input state matrix, s
# %rsi: up to 8 data blocks output, o
# %rdx: up to 8 data blocks input, i
@@ -1018,4 +1018,4 @@ ENTRY(chacha_8block_xor_avx2)
jmp .Ldone8
-ENDPROC(chacha_8block_xor_avx2)
+SYM_FUNC_END(chacha_8block_xor_avx2)
diff --git a/arch/x86/crypto/chacha-avx512vl-x86_64.S b/arch/x86/crypto/chacha-avx512vl-x86_64.S
index 848f9c75fd4f..bb193fde123a 100644
--- a/arch/x86/crypto/chacha-avx512vl-x86_64.S
+++ b/arch/x86/crypto/chacha-avx512vl-x86_64.S
@@ -24,7 +24,7 @@ CTR8BL: .octa 0x00000003000000020000000100000000
.text
-ENTRY(chacha_2block_xor_avx512vl)
+SYM_FUNC_START(chacha_2block_xor_avx512vl)
# %rdi: Input state matrix, s
# %rsi: up to 2 data blocks output, o
# %rdx: up to 2 data blocks input, i
@@ -187,9 +187,9 @@ ENTRY(chacha_2block_xor_avx512vl)
jmp .Ldone2
-ENDPROC(chacha_2block_xor_avx512vl)
+SYM_FUNC_END(chacha_2block_xor_avx512vl)
-ENTRY(chacha_4block_xor_avx512vl)
+SYM_FUNC_START(chacha_4block_xor_avx512vl)
# %rdi: Input state matrix, s
# %rsi: up to 4 data blocks output, o
# %rdx: up to 4 data blocks input, i
@@ -453,9 +453,9 @@ ENTRY(chacha_4block_xor_avx512vl)
jmp .Ldone4
-ENDPROC(chacha_4block_xor_avx512vl)
+SYM_FUNC_END(chacha_4block_xor_avx512vl)
-ENTRY(chacha_8block_xor_avx512vl)
+SYM_FUNC_START(chacha_8block_xor_avx512vl)
# %rdi: Input state matrix, s
# %rsi: up to 8 data blocks output, o
# %rdx: up to 8 data blocks input, i
@@ -833,4 +833,4 @@ ENTRY(chacha_8block_xor_avx512vl)
jmp .Ldone8
-ENDPROC(chacha_8block_xor_avx512vl)
+SYM_FUNC_END(chacha_8block_xor_avx512vl)
diff --git a/arch/x86/crypto/chacha-ssse3-x86_64.S b/arch/x86/crypto/chacha-ssse3-x86_64.S
index 2d86c7d6dc88..a38ab2512a6f 100644
--- a/arch/x86/crypto/chacha-ssse3-x86_64.S
+++ b/arch/x86/crypto/chacha-ssse3-x86_64.S
@@ -33,7 +33,7 @@ CTRINC: .octa 0x00000003000000020000000100000000
*
* Clobbers: %r8d, %xmm4-%xmm7
*/
-chacha_permute:
+SYM_FUNC_START_LOCAL(chacha_permute)
movdqa ROT8(%rip),%xmm4
movdqa ROT16(%rip),%xmm5
@@ -109,9 +109,9 @@ chacha_permute:
jnz .Ldoubleround
ret
-ENDPROC(chacha_permute)
+SYM_FUNC_END(chacha_permute)
-ENTRY(chacha_block_xor_ssse3)
+SYM_FUNC_START(chacha_block_xor_ssse3)
# %rdi: Input state matrix, s
# %rsi: up to 1 data block output, o
# %rdx: up to 1 data block input, i
@@ -197,9 +197,9 @@ ENTRY(chacha_block_xor_ssse3)
lea -8(%r10),%rsp
jmp .Ldone
-ENDPROC(chacha_block_xor_ssse3)
+SYM_FUNC_END(chacha_block_xor_ssse3)
-ENTRY(hchacha_block_ssse3)
+SYM_FUNC_START(hchacha_block_ssse3)
# %rdi: Input state matrix, s
# %rsi: output (8 32-bit words)
# %edx: nrounds
@@ -218,9 +218,9 @@ ENTRY(hchacha_block_ssse3)
FRAME_END
ret
-ENDPROC(hchacha_block_ssse3)
+SYM_FUNC_END(hchacha_block_ssse3)
-ENTRY(chacha_4block_xor_ssse3)
+SYM_FUNC_START(chacha_4block_xor_ssse3)
# %rdi: Input state matrix, s
# %rsi: up to 4 data blocks output, o
# %rdx: up to 4 data blocks input, i
@@ -788,4 +788,4 @@ ENTRY(chacha_4block_xor_ssse3)
jmp .Ldone4
-ENDPROC(chacha_4block_xor_ssse3)
+SYM_FUNC_END(chacha_4block_xor_ssse3)
diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 388f95a4ec24..a94e30b6f941 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -7,7 +7,7 @@
*/
#include <crypto/algapi.h>
-#include <crypto/chacha.h>
+#include <crypto/internal/chacha.h>
#include <crypto/internal/simd.h>
#include <crypto/internal/skcipher.h>
#include <linux/kernel.h>
@@ -21,24 +21,24 @@ asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
asmlinkage void chacha_4block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void hchacha_block_ssse3(const u32 *state, u32 *out, int nrounds);
-#ifdef CONFIG_AS_AVX2
+
asmlinkage void chacha_2block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx2(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx2;
-#ifdef CONFIG_AS_AVX512
+
asmlinkage void chacha_2block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
unsigned int len, int nrounds);
-static bool chacha_use_avx512vl;
-#endif
-#endif
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
{
@@ -49,9 +49,8 @@ static unsigned int chacha_advance(unsigned int len, unsigned int maxblocks)
static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
unsigned int bytes, int nrounds)
{
-#ifdef CONFIG_AS_AVX2
-#ifdef CONFIG_AS_AVX512
- if (chacha_use_avx512vl) {
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ static_branch_likely(&chacha_use_avx512vl)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx512vl(state, dst, src, bytes,
nrounds);
@@ -79,8 +78,9 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
- if (chacha_use_avx2) {
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ static_branch_likely(&chacha_use_avx2)) {
while (bytes >= CHACHA_BLOCK_SIZE * 8) {
chacha_8block_xor_avx2(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 8;
@@ -104,7 +104,7 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
return;
}
}
-#endif
+
while (bytes >= CHACHA_BLOCK_SIZE * 4) {
chacha_4block_xor_ssse3(state, dst, src, bytes, nrounds);
bytes -= CHACHA_BLOCK_SIZE * 4;
@@ -123,37 +123,76 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
}
}
-static int chacha_simd_stream_xor(struct skcipher_walk *walk,
+void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
+ hchacha_block_generic(state, stream, nrounds);
+ } else {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, stream, nrounds);
+ kernel_fpu_end();
+ }
+}
+EXPORT_SYMBOL(hchacha_block_arch);
+
+void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ chacha_init_generic(state, key, iv);
+}
+EXPORT_SYMBOL(chacha_init_arch);
+
+void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
+ int nrounds)
+{
+ state = PTR_ALIGN(state, CHACHA_STATE_ALIGN);
+
+ if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
+ bytes <= CHACHA_BLOCK_SIZE)
+ return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
+ kernel_fpu_begin();
+ chacha_dosimd(state, dst, src, bytes, nrounds);
+ kernel_fpu_end();
+}
+EXPORT_SYMBOL(chacha_crypt_arch);
+
+static int chacha_simd_stream_xor(struct skcipher_request *req,
const struct chacha_ctx *ctx, const u8 *iv)
{
u32 *state, state_buf[16 + 2] __aligned(8);
- int next_yield = 4096; /* bytes until next FPU yield */
- int err = 0;
+ struct skcipher_walk walk;
+ int err;
+
+ err = skcipher_walk_virt(&walk, req, false);
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, iv);
-
- while (walk->nbytes > 0) {
- unsigned int nbytes = walk->nbytes;
+ chacha_init_generic(state, ctx->key, iv);
- if (nbytes < walk->total) {
- nbytes = round_down(nbytes, walk->stride);
- next_yield -= nbytes;
- }
+ while (walk.nbytes > 0) {
+ unsigned int nbytes = walk.nbytes;
- chacha_dosimd(state, walk->dst.virt.addr, walk->src.virt.addr,
- nbytes, ctx->nrounds);
+ if (nbytes < walk.total)
+ nbytes = round_down(nbytes, walk.stride);
- if (next_yield <= 0) {
- /* temporarily allow preemption */
- kernel_fpu_end();
+ if (!static_branch_likely(&chacha_use_simd) ||
+ !crypto_simd_usable()) {
+ chacha_crypt_generic(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ } else {
kernel_fpu_begin();
- next_yield = 4096;
+ chacha_dosimd(state, walk.dst.virt.addr,
+ walk.src.virt.addr, nbytes,
+ ctx->nrounds);
+ kernel_fpu_end();
}
-
- err = skcipher_walk_done(walk, walk->nbytes - nbytes);
+ err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
}
return err;
@@ -163,55 +202,34 @@ static int chacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- int err;
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_chacha_crypt(req);
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
-
- kernel_fpu_begin();
- err = chacha_simd_stream_xor(&walk, ctx, req->iv);
- kernel_fpu_end();
- return err;
+ return chacha_simd_stream_xor(req, ctx, req->iv);
}
static int xchacha_simd(struct skcipher_request *req)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm);
- struct skcipher_walk walk;
- struct chacha_ctx subctx;
u32 *state, state_buf[16 + 2] __aligned(8);
+ struct chacha_ctx subctx;
u8 real_iv[16];
- int err;
-
- if (req->cryptlen <= CHACHA_BLOCK_SIZE || !crypto_simd_usable())
- return crypto_xchacha_crypt(req);
-
- err = skcipher_walk_virt(&walk, req, true);
- if (err)
- return err;
BUILD_BUG_ON(CHACHA_STATE_ALIGN != 16);
state = PTR_ALIGN(state_buf + 0, CHACHA_STATE_ALIGN);
- crypto_chacha_init(state, ctx, req->iv);
-
- kernel_fpu_begin();
-
- hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ chacha_init_generic(state, ctx->key, req->iv);
+
+ if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
+ kernel_fpu_begin();
+ hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+ kernel_fpu_end();
+ } else {
+ hchacha_block_generic(state, subctx.key, ctx->nrounds);
+ }
subctx.nrounds = ctx->nrounds;
memcpy(&real_iv[0], req->iv + 24, 8);
memcpy(&real_iv[8], req->iv + 16, 8);
- err = chacha_simd_stream_xor(&walk, &subctx, real_iv);
-
- kernel_fpu_end();
-
- return err;
+ return chacha_simd_stream_xor(req, &subctx, real_iv);
}
static struct skcipher_alg algs[] = {
@@ -227,7 +245,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = CHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = chacha_simd,
.decrypt = chacha_simd,
}, {
@@ -242,7 +260,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha20_setkey,
+ .setkey = chacha20_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
}, {
@@ -257,7 +275,7 @@ static struct skcipher_alg algs[] = {
.max_keysize = CHACHA_KEY_SIZE,
.ivsize = XCHACHA_IV_SIZE,
.chunksize = CHACHA_BLOCK_SIZE,
- .setkey = crypto_chacha12_setkey,
+ .setkey = chacha12_setkey,
.encrypt = xchacha_simd,
.decrypt = xchacha_simd,
},
@@ -266,24 +284,28 @@ static struct skcipher_alg algs[] = {
static int __init chacha_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_SSSE3))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- chacha_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
-#ifdef CONFIG_AS_AVX512
- chacha_use_avx512vl = chacha_use_avx2 &&
- boot_cpu_has(X86_FEATURE_AVX512VL) &&
- boot_cpu_has(X86_FEATURE_AVX512BW); /* kmovq */
-#endif
-#endif
+ return 0;
+
+ static_branch_enable(&chacha_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
+ static_branch_enable(&chacha_use_avx2);
+
+ if (IS_ENABLED(CONFIG_AS_AVX512) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW)) /* kmovq */
+ static_branch_enable(&chacha_use_avx512vl);
+ }
return crypto_register_skciphers(algs, ARRAY_SIZE(algs));
}
static void __exit chacha_simd_mod_fini(void)
{
- crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
+ if (boot_cpu_has(X86_FEATURE_SSSE3))
+ crypto_unregister_skciphers(algs, ARRAY_SIZE(algs));
}
module_init(chacha_simd_mod_init);
diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pclmul_asm.S
index 1c099dc08cc3..9fd28ff65bc2 100644
--- a/arch/x86/crypto/crc32-pclmul_asm.S
+++ b/arch/x86/crypto/crc32-pclmul_asm.S
@@ -103,7 +103,7 @@
* size_t len, uint crc32)
*/
-ENTRY(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
+SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
movdqa (BUF), %xmm1
movdqa 0x10(BUF), %xmm2
movdqa 0x20(BUF), %xmm3
@@ -238,4 +238,4 @@ fold_64:
PEXTRD 0x01, %xmm1, %eax
ret
-ENDPROC(crc32_pclmul_le_16)
+SYM_FUNC_END(crc32_pclmul_le_16)
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index d9b734d0c8cc..0e6690e3618c 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -74,7 +74,7 @@
# unsigned int crc_pcl(u8 *buffer, int len, unsigned int crc_init);
.text
-ENTRY(crc_pcl)
+SYM_FUNC_START(crc_pcl)
#define bufp %rdi
#define bufp_dw %edi
#define bufp_w %di
@@ -311,7 +311,7 @@ do_return:
popq %rdi
popq %rbx
ret
-ENDPROC(crc_pcl)
+SYM_FUNC_END(crc_pcl)
.section .rodata, "a", @progbits
################################################################
diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index 3d873e67749d..b2533d63030e 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -95,7 +95,7 @@
# Assumes len >= 16.
#
.align 16
-ENTRY(crc_t10dif_pcl)
+SYM_FUNC_START(crc_t10dif_pcl)
movdqa .Lbswap_mask(%rip), BSWAP_MASK
@@ -280,7 +280,7 @@ ENTRY(crc_t10dif_pcl)
jge .Lfold_16_bytes_loop # 32 <= len <= 255
add $16, len
jmp .Lhandle_partial_segment # 17 <= len <= 31
-ENDPROC(crc_t10dif_pcl)
+SYM_FUNC_END(crc_t10dif_pcl)
.section .rodata, "a", @progbits
.align 16
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
new file mode 100644
index 000000000000..a52a3fb15727
--- /dev/null
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -0,0 +1,2475 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/*
+ * Copyright (c) 2017 Armando Faz <armfazh@ic.unicamp.br>. All Rights Reserved.
+ * Copyright (C) 2018-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ * Copyright (C) 2018 Samuel Neves <sneves@dei.uc.pt>. All Rights Reserved.
+ */
+
+#include <crypto/curve25519.h>
+#include <crypto/internal/kpp.h>
+
+#include <linux/types.h>
+#include <linux/jump_label.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include <asm/cpufeature.h>
+#include <asm/processor.h>
+
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_bmi2);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(curve25519_use_adx);
+
+enum { NUM_WORDS_ELTFP25519 = 4 };
+typedef __aligned(32) u64 eltfp25519_1w[NUM_WORDS_ELTFP25519];
+typedef __aligned(32) u64 eltfp25519_1w_buffer[2 * NUM_WORDS_ELTFP25519];
+
+#define mul_eltfp25519_1w_adx(c, a, b) do { \
+ mul_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_1w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_1w_bmi2(c, a, b) do { \
+ mul_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_1w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_adx(a) do { \
+ sqr_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_1w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_1w_bmi2(a) do { \
+ sqr_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_1w_bmi2(a, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_adx(c, a, b) do { \
+ mul2_256x256_integer_adx(m.buffer, a, b); \
+ red_eltfp25519_2w_adx(c, m.buffer); \
+} while (0)
+
+#define mul_eltfp25519_2w_bmi2(c, a, b) do { \
+ mul2_256x256_integer_bmi2(m.buffer, a, b); \
+ red_eltfp25519_2w_bmi2(c, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_adx(a) do { \
+ sqr2_256x256_integer_adx(m.buffer, a); \
+ red_eltfp25519_2w_adx(a, m.buffer); \
+} while (0)
+
+#define sqr_eltfp25519_2w_bmi2(a) do { \
+ sqr2_256x256_integer_bmi2(m.buffer, a); \
+ red_eltfp25519_2w_bmi2(a, m.buffer); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_adx(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_adx(a); \
+} while (0)
+
+#define sqrn_eltfp25519_1w_bmi2(a, times) do { \
+ int ____counter = (times); \
+ while (____counter-- > 0) \
+ sqr_eltfp25519_1w_bmi2(a); \
+} while (0)
+
+#define copy_eltfp25519_1w(C, A) do { \
+ (C)[0] = (A)[0]; \
+ (C)[1] = (A)[1]; \
+ (C)[2] = (A)[2]; \
+ (C)[3] = (A)[3]; \
+} while (0)
+
+#define setzero_eltfp25519_1w(C) do { \
+ (C)[0] = 0; \
+ (C)[1] = 0; \
+ (C)[2] = 0; \
+ (C)[3] = 0; \
+} while (0)
+
+__aligned(32) static const u64 table_ladder_8k[252 * NUM_WORDS_ELTFP25519] = {
+ /* 1 */ 0xfffffffffffffff3UL, 0xffffffffffffffffUL,
+ 0xffffffffffffffffUL, 0x5fffffffffffffffUL,
+ /* 2 */ 0x6b8220f416aafe96UL, 0x82ebeb2b4f566a34UL,
+ 0xd5a9a5b075a5950fUL, 0x5142b2cf4b2488f4UL,
+ /* 3 */ 0x6aaebc750069680cUL, 0x89cf7820a0f99c41UL,
+ 0x2a58d9183b56d0f4UL, 0x4b5aca80e36011a4UL,
+ /* 4 */ 0x329132348c29745dUL, 0xf4a2e616e1642fd7UL,
+ 0x1e45bb03ff67bc34UL, 0x306912d0f42a9b4aUL,
+ /* 5 */ 0xff886507e6af7154UL, 0x04f50e13dfeec82fUL,
+ 0xaa512fe82abab5ceUL, 0x174e251a68d5f222UL,
+ /* 6 */ 0xcf96700d82028898UL, 0x1743e3370a2c02c5UL,
+ 0x379eec98b4e86eaaUL, 0x0c59888a51e0482eUL,
+ /* 7 */ 0xfbcbf1d699b5d189UL, 0xacaef0d58e9fdc84UL,
+ 0xc1c20d06231f7614UL, 0x2938218da274f972UL,
+ /* 8 */ 0xf6af49beff1d7f18UL, 0xcc541c22387ac9c2UL,
+ 0x96fcc9ef4015c56bUL, 0x69c1627c690913a9UL,
+ /* 9 */ 0x7a86fd2f4733db0eUL, 0xfdb8c4f29e087de9UL,
+ 0x095e4b1a8ea2a229UL, 0x1ad7a7c829b37a79UL,
+ /* 10 */ 0x342d89cad17ea0c0UL, 0x67bedda6cced2051UL,
+ 0x19ca31bf2bb42f74UL, 0x3df7b4c84980acbbUL,
+ /* 11 */ 0xa8c6444dc80ad883UL, 0xb91e440366e3ab85UL,
+ 0xc215cda00164f6d8UL, 0x3d867c6ef247e668UL,
+ /* 12 */ 0xc7dd582bcc3e658cUL, 0xfd2c4748ee0e5528UL,
+ 0xa0fd9b95cc9f4f71UL, 0x7529d871b0675ddfUL,
+ /* 13 */ 0xb8f568b42d3cbd78UL, 0x1233011b91f3da82UL,
+ 0x2dce6ccd4a7c3b62UL, 0x75e7fc8e9e498603UL,
+ /* 14 */ 0x2f4f13f1fcd0b6ecUL, 0xf1a8ca1f29ff7a45UL,
+ 0xc249c1a72981e29bUL, 0x6ebe0dbb8c83b56aUL,
+ /* 15 */ 0x7114fa8d170bb222UL, 0x65a2dcd5bf93935fUL,
+ 0xbdc41f68b59c979aUL, 0x2f0eef79a2ce9289UL,
+ /* 16 */ 0x42ecbf0c083c37ceUL, 0x2930bc09ec496322UL,
+ 0xf294b0c19cfeac0dUL, 0x3780aa4bedfabb80UL,
+ /* 17 */ 0x56c17d3e7cead929UL, 0xe7cb4beb2e5722c5UL,
+ 0x0ce931732dbfe15aUL, 0x41b883c7621052f8UL,
+ /* 18 */ 0xdbf75ca0c3d25350UL, 0x2936be086eb1e351UL,
+ 0xc936e03cb4a9b212UL, 0x1d45bf82322225aaUL,
+ /* 19 */ 0xe81ab1036a024cc5UL, 0xe212201c304c9a72UL,
+ 0xc5d73fba6832b1fcUL, 0x20ffdb5a4d839581UL,
+ /* 20 */ 0xa283d367be5d0fadUL, 0x6c2b25ca8b164475UL,
+ 0x9d4935467caaf22eUL, 0x5166408eee85ff49UL,
+ /* 21 */ 0x3c67baa2fab4e361UL, 0xb3e433c67ef35cefUL,
+ 0x5259729241159b1cUL, 0x6a621892d5b0ab33UL,
+ /* 22 */ 0x20b74a387555cdcbUL, 0x532aa10e1208923fUL,
+ 0xeaa17b7762281dd1UL, 0x61ab3443f05c44bfUL,
+ /* 23 */ 0x257a6c422324def8UL, 0x131c6c1017e3cf7fUL,
+ 0x23758739f630a257UL, 0x295a407a01a78580UL,
+ /* 24 */ 0xf8c443246d5da8d9UL, 0x19d775450c52fa5dUL,
+ 0x2afcfc92731bf83dUL, 0x7d10c8e81b2b4700UL,
+ /* 25 */ 0xc8e0271f70baa20bUL, 0x993748867ca63957UL,
+ 0x5412efb3cb7ed4bbUL, 0x3196d36173e62975UL,
+ /* 26 */ 0xde5bcad141c7dffcUL, 0x47cc8cd2b395c848UL,
+ 0xa34cd942e11af3cbUL, 0x0256dbf2d04ecec2UL,
+ /* 27 */ 0x875ab7e94b0e667fUL, 0xcad4dd83c0850d10UL,
+ 0x47f12e8f4e72c79fUL, 0x5f1a87bb8c85b19bUL,
+ /* 28 */ 0x7ae9d0b6437f51b8UL, 0x12c7ce5518879065UL,
+ 0x2ade09fe5cf77aeeUL, 0x23a05a2f7d2c5627UL,
+ /* 29 */ 0x5908e128f17c169aUL, 0xf77498dd8ad0852dUL,
+ 0x74b4c4ceab102f64UL, 0x183abadd10139845UL,
+ /* 30 */ 0xb165ba8daa92aaacUL, 0xd5c5ef9599386705UL,
+ 0xbe2f8f0cf8fc40d1UL, 0x2701e635ee204514UL,
+ /* 31 */ 0x629fa80020156514UL, 0xf223868764a8c1ceUL,
+ 0x5b894fff0b3f060eUL, 0x60d9944cf708a3faUL,
+ /* 32 */ 0xaeea001a1c7a201fUL, 0xebf16a633ee2ce63UL,
+ 0x6f7709594c7a07e1UL, 0x79b958150d0208cbUL,
+ /* 33 */ 0x24b55e5301d410e7UL, 0xe3a34edff3fdc84dUL,
+ 0xd88768e4904032d8UL, 0x131384427b3aaeecUL,
+ /* 34 */ 0x8405e51286234f14UL, 0x14dc4739adb4c529UL,
+ 0xb8a2b5b250634ffdUL, 0x2fe2a94ad8a7ff93UL,
+ /* 35 */ 0xec5c57efe843faddUL, 0x2843ce40f0bb9918UL,
+ 0xa4b561d6cf3d6305UL, 0x743629bde8fb777eUL,
+ /* 36 */ 0x343edd46bbaf738fUL, 0xed981828b101a651UL,
+ 0xa401760b882c797aUL, 0x1fc223e28dc88730UL,
+ /* 37 */ 0x48604e91fc0fba0eUL, 0xb637f78f052c6fa4UL,
+ 0x91ccac3d09e9239cUL, 0x23f7eed4437a687cUL,
+ /* 38 */ 0x5173b1118d9bd800UL, 0x29d641b63189d4a7UL,
+ 0xfdbf177988bbc586UL, 0x2959894fcad81df5UL,
+ /* 39 */ 0xaebc8ef3b4bbc899UL, 0x4148995ab26992b9UL,
+ 0x24e20b0134f92cfbUL, 0x40d158894a05dee8UL,
+ /* 40 */ 0x46b00b1185af76f6UL, 0x26bac77873187a79UL,
+ 0x3dc0bf95ab8fff5fUL, 0x2a608bd8945524d7UL,
+ /* 41 */ 0x26449588bd446302UL, 0x7c4bc21c0388439cUL,
+ 0x8e98a4f383bd11b2UL, 0x26218d7bc9d876b9UL,
+ /* 42 */ 0xe3081542997c178aUL, 0x3c2d29a86fb6606fUL,
+ 0x5c217736fa279374UL, 0x7dde05734afeb1faUL,
+ /* 43 */ 0x3bf10e3906d42babUL, 0xe4f7803e1980649cUL,
+ 0xe6053bf89595bf7aUL, 0x394faf38da245530UL,
+ /* 44 */ 0x7a8efb58896928f4UL, 0xfbc778e9cc6a113cUL,
+ 0x72670ce330af596fUL, 0x48f222a81d3d6cf7UL,
+ /* 45 */ 0xf01fce410d72caa7UL, 0x5a20ecc7213b5595UL,
+ 0x7bc21165c1fa1483UL, 0x07f89ae31da8a741UL,
+ /* 46 */ 0x05d2c2b4c6830ff9UL, 0xd43e330fc6316293UL,
+ 0xa5a5590a96d3a904UL, 0x705edb91a65333b6UL,
+ /* 47 */ 0x048ee15e0bb9a5f7UL, 0x3240cfca9e0aaf5dUL,
+ 0x8f4b71ceedc4a40bUL, 0x621c0da3de544a6dUL,
+ /* 48 */ 0x92872836a08c4091UL, 0xce8375b010c91445UL,
+ 0x8a72eb524f276394UL, 0x2667fcfa7ec83635UL,
+ /* 49 */ 0x7f4c173345e8752aUL, 0x061b47feee7079a5UL,
+ 0x25dd9afa9f86ff34UL, 0x3780cef5425dc89cUL,
+ /* 50 */ 0x1a46035a513bb4e9UL, 0x3e1ef379ac575adaUL,
+ 0xc78c5f1c5fa24b50UL, 0x321a967634fd9f22UL,
+ /* 51 */ 0x946707b8826e27faUL, 0x3dca84d64c506fd0UL,
+ 0xc189218075e91436UL, 0x6d9284169b3b8484UL,
+ /* 52 */ 0x3a67e840383f2ddfUL, 0x33eec9a30c4f9b75UL,
+ 0x3ec7c86fa783ef47UL, 0x26ec449fbac9fbc4UL,
+ /* 53 */ 0x5c0f38cba09b9e7dUL, 0x81168cc762a3478cUL,
+ 0x3e23b0d306fc121cUL, 0x5a238aa0a5efdcddUL,
+ /* 54 */ 0x1ba26121c4ea43ffUL, 0x36f8c77f7c8832b5UL,
+ 0x88fbea0b0adcf99aUL, 0x5ca9938ec25bebf9UL,
+ /* 55 */ 0xd5436a5e51fccda0UL, 0x1dbc4797c2cd893bUL,
+ 0x19346a65d3224a08UL, 0x0f5034e49b9af466UL,
+ /* 56 */ 0xf23c3967a1e0b96eUL, 0xe58b08fa867a4d88UL,
+ 0xfb2fabc6a7341679UL, 0x2a75381eb6026946UL,
+ /* 57 */ 0xc80a3be4c19420acUL, 0x66b1f6c681f2b6dcUL,
+ 0x7cf7036761e93388UL, 0x25abbbd8a660a4c4UL,
+ /* 58 */ 0x91ea12ba14fd5198UL, 0x684950fc4a3cffa9UL,
+ 0xf826842130f5ad28UL, 0x3ea988f75301a441UL,
+ /* 59 */ 0xc978109a695f8c6fUL, 0x1746eb4a0530c3f3UL,
+ 0x444d6d77b4459995UL, 0x75952b8c054e5cc7UL,
+ /* 60 */ 0xa3703f7915f4d6aaUL, 0x66c346202f2647d8UL,
+ 0xd01469df811d644bUL, 0x77fea47d81a5d71fUL,
+ /* 61 */ 0xc5e9529ef57ca381UL, 0x6eeeb4b9ce2f881aUL,
+ 0xb6e91a28e8009bd6UL, 0x4b80be3e9afc3fecUL,
+ /* 62 */ 0x7e3773c526aed2c5UL, 0x1b4afcb453c9a49dUL,
+ 0xa920bdd7baffb24dUL, 0x7c54699f122d400eUL,
+ /* 63 */ 0xef46c8e14fa94bc8UL, 0xe0b074ce2952ed5eUL,
+ 0xbea450e1dbd885d5UL, 0x61b68649320f712cUL,
+ /* 64 */ 0x8a485f7309ccbdd1UL, 0xbd06320d7d4d1a2dUL,
+ 0x25232973322dbef4UL, 0x445dc4758c17f770UL,
+ /* 65 */ 0xdb0434177cc8933cUL, 0xed6fe82175ea059fUL,
+ 0x1efebefdc053db34UL, 0x4adbe867c65daf99UL,
+ /* 66 */ 0x3acd71a2a90609dfUL, 0xe5e991856dd04050UL,
+ 0x1ec69b688157c23cUL, 0x697427f6885cfe4dUL,
+ /* 67 */ 0xd7be7b9b65e1a851UL, 0xa03d28d522c536ddUL,
+ 0x28399d658fd2b645UL, 0x49e5b7e17c2641e1UL,
+ /* 68 */ 0x6f8c3a98700457a4UL, 0x5078f0a25ebb6778UL,
+ 0xd13c3ccbc382960fUL, 0x2e003258a7df84b1UL,
+ /* 69 */ 0x8ad1f39be6296a1cUL, 0xc1eeaa652a5fbfb2UL,
+ 0x33ee0673fd26f3cbUL, 0x59256173a69d2cccUL,
+ /* 70 */ 0x41ea07aa4e18fc41UL, 0xd9fc19527c87a51eUL,
+ 0xbdaacb805831ca6fUL, 0x445b652dc916694fUL,
+ /* 71 */ 0xce92a3a7f2172315UL, 0x1edc282de11b9964UL,
+ 0xa1823aafe04c314aUL, 0x790a2d94437cf586UL,
+ /* 72 */ 0x71c447fb93f6e009UL, 0x8922a56722845276UL,
+ 0xbf70903b204f5169UL, 0x2f7a89891ba319feUL,
+ /* 73 */ 0x02a08eb577e2140cUL, 0xed9a4ed4427bdcf4UL,
+ 0x5253ec44e4323cd1UL, 0x3e88363c14e9355bUL,
+ /* 74 */ 0xaa66c14277110b8cUL, 0x1ae0391610a23390UL,
+ 0x2030bd12c93fc2a2UL, 0x3ee141579555c7abUL,
+ /* 75 */ 0x9214de3a6d6e7d41UL, 0x3ccdd88607f17efeUL,
+ 0x674f1288f8e11217UL, 0x5682250f329f93d0UL,
+ /* 76 */ 0x6cf00b136d2e396eUL, 0x6e4cf86f1014debfUL,
+ 0x5930b1b5bfcc4e83UL, 0x047069b48aba16b6UL,
+ /* 77 */ 0x0d4ce4ab69b20793UL, 0xb24db91a97d0fb9eUL,
+ 0xcdfa50f54e00d01dUL, 0x221b1085368bddb5UL,
+ /* 78 */ 0xe7e59468b1e3d8d2UL, 0x53c56563bd122f93UL,
+ 0xeee8a903e0663f09UL, 0x61efa662cbbe3d42UL,
+ /* 79 */ 0x2cf8ddddde6eab2aUL, 0x9bf80ad51435f231UL,
+ 0x5deadacec9f04973UL, 0x29275b5d41d29b27UL,
+ /* 80 */ 0xcfde0f0895ebf14fUL, 0xb9aab96b054905a7UL,
+ 0xcae80dd9a1c420fdUL, 0x0a63bf2f1673bbc7UL,
+ /* 81 */ 0x092f6e11958fbc8cUL, 0x672a81e804822fadUL,
+ 0xcac8351560d52517UL, 0x6f3f7722c8f192f8UL,
+ /* 82 */ 0xf8ba90ccc2e894b7UL, 0x2c7557a438ff9f0dUL,
+ 0x894d1d855ae52359UL, 0x68e122157b743d69UL,
+ /* 83 */ 0xd87e5570cfb919f3UL, 0x3f2cdecd95798db9UL,
+ 0x2121154710c0a2ceUL, 0x3c66a115246dc5b2UL,
+ /* 84 */ 0xcbedc562294ecb72UL, 0xba7143c36a280b16UL,
+ 0x9610c2efd4078b67UL, 0x6144735d946a4b1eUL,
+ /* 85 */ 0x536f111ed75b3350UL, 0x0211db8c2041d81bUL,
+ 0xf93cb1000e10413cUL, 0x149dfd3c039e8876UL,
+ /* 86 */ 0xd479dde46b63155bUL, 0xb66e15e93c837976UL,
+ 0xdafde43b1f13e038UL, 0x5fafda1a2e4b0b35UL,
+ /* 87 */ 0x3600bbdf17197581UL, 0x3972050bbe3cd2c2UL,
+ 0x5938906dbdd5be86UL, 0x34fce5e43f9b860fUL,
+ /* 88 */ 0x75a8a4cd42d14d02UL, 0x828dabc53441df65UL,
+ 0x33dcabedd2e131d3UL, 0x3ebad76fb814d25fUL,
+ /* 89 */ 0xd4906f566f70e10fUL, 0x5d12f7aa51690f5aUL,
+ 0x45adb16e76cefcf2UL, 0x01f768aead232999UL,
+ /* 90 */ 0x2b6cc77b6248febdUL, 0x3cd30628ec3aaffdUL,
+ 0xce1c0b80d4ef486aUL, 0x4c3bff2ea6f66c23UL,
+ /* 91 */ 0x3f2ec4094aeaeb5fUL, 0x61b19b286e372ca7UL,
+ 0x5eefa966de2a701dUL, 0x23b20565de55e3efUL,
+ /* 92 */ 0xe301ca5279d58557UL, 0x07b2d4ce27c2874fUL,
+ 0xa532cd8a9dcf1d67UL, 0x2a52fee23f2bff56UL,
+ /* 93 */ 0x8624efb37cd8663dUL, 0xbbc7ac20ffbd7594UL,
+ 0x57b85e9c82d37445UL, 0x7b3052cb86a6ec66UL,
+ /* 94 */ 0x3482f0ad2525e91eUL, 0x2cb68043d28edca0UL,
+ 0xaf4f6d052e1b003aUL, 0x185f8c2529781b0aUL,
+ /* 95 */ 0xaa41de5bd80ce0d6UL, 0x9407b2416853e9d6UL,
+ 0x563ec36e357f4c3aUL, 0x4cc4b8dd0e297bceUL,
+ /* 96 */ 0xa2fc1a52ffb8730eUL, 0x1811f16e67058e37UL,
+ 0x10f9a366cddf4ee1UL, 0x72f4a0c4a0b9f099UL,
+ /* 97 */ 0x8c16c06f663f4ea7UL, 0x693b3af74e970fbaUL,
+ 0x2102e7f1d69ec345UL, 0x0ba53cbc968a8089UL,
+ /* 98 */ 0xca3d9dc7fea15537UL, 0x4c6824bb51536493UL,
+ 0xb9886314844006b1UL, 0x40d2a72ab454cc60UL,
+ /* 99 */ 0x5936a1b712570975UL, 0x91b9d648debda657UL,
+ 0x3344094bb64330eaUL, 0x006ba10d12ee51d0UL,
+ /* 100 */ 0x19228468f5de5d58UL, 0x0eb12f4c38cc05b0UL,
+ 0xa1039f9dd5601990UL, 0x4502d4ce4fff0e0bUL,
+ /* 101 */ 0xeb2054106837c189UL, 0xd0f6544c6dd3b93cUL,
+ 0x40727064c416d74fUL, 0x6e15c6114b502ef0UL,
+ /* 102 */ 0x4df2a398cfb1a76bUL, 0x11256c7419f2f6b1UL,
+ 0x4a497962066e6043UL, 0x705b3aab41355b44UL,
+ /* 103 */ 0x365ef536d797b1d8UL, 0x00076bd622ddf0dbUL,
+ 0x3bbf33b0e0575a88UL, 0x3777aa05c8e4ca4dUL,
+ /* 104 */ 0x392745c85578db5fUL, 0x6fda4149dbae5ae2UL,
+ 0xb1f0b00b8adc9867UL, 0x09963437d36f1da3UL,
+ /* 105 */ 0x7e824e90a5dc3853UL, 0xccb5f6641f135cbdUL,
+ 0x6736d86c87ce8fccUL, 0x625f3ce26604249fUL,
+ /* 106 */ 0xaf8ac8059502f63fUL, 0x0c05e70a2e351469UL,
+ 0x35292e9c764b6305UL, 0x1a394360c7e23ac3UL,
+ /* 107 */ 0xd5c6d53251183264UL, 0x62065abd43c2b74fUL,
+ 0xb5fbf5d03b973f9bUL, 0x13a3da3661206e5eUL,
+ /* 108 */ 0xc6bd5837725d94e5UL, 0x18e30912205016c5UL,
+ 0x2088ce1570033c68UL, 0x7fba1f495c837987UL,
+ /* 109 */ 0x5a8c7423f2f9079dUL, 0x1735157b34023fc5UL,
+ 0xe4f9b49ad2fab351UL, 0x6691ff72c878e33cUL,
+ /* 110 */ 0x122c2adedc5eff3eUL, 0xf8dd4bf1d8956cf4UL,
+ 0xeb86205d9e9e5bdaUL, 0x049b92b9d975c743UL,
+ /* 111 */ 0xa5379730b0f6c05aUL, 0x72a0ffacc6f3a553UL,
+ 0xb0032c34b20dcd6dUL, 0x470e9dbc88d5164aUL,
+ /* 112 */ 0xb19cf10ca237c047UL, 0xb65466711f6c81a2UL,
+ 0xb3321bd16dd80b43UL, 0x48c14f600c5fbe8eUL,
+ /* 113 */ 0x66451c264aa6c803UL, 0xb66e3904a4fa7da6UL,
+ 0xd45f19b0b3128395UL, 0x31602627c3c9bc10UL,
+ /* 114 */ 0x3120dc4832e4e10dUL, 0xeb20c46756c717f7UL,
+ 0x00f52e3f67280294UL, 0x566d4fc14730c509UL,
+ /* 115 */ 0x7e3a5d40fd837206UL, 0xc1e926dc7159547aUL,
+ 0x216730fba68d6095UL, 0x22e8c3843f69cea7UL,
+ /* 116 */ 0x33d074e8930e4b2bUL, 0xb6e4350e84d15816UL,
+ 0x5534c26ad6ba2365UL, 0x7773c12f89f1f3f3UL,
+ /* 117 */ 0x8cba404da57962aaUL, 0x5b9897a81999ce56UL,
+ 0x508e862f121692fcUL, 0x3a81907fa093c291UL,
+ /* 118 */ 0x0dded0ff4725a510UL, 0x10d8cc10673fc503UL,
+ 0x5b9d151c9f1f4e89UL, 0x32a5c1d5cb09a44cUL,
+ /* 119 */ 0x1e0aa442b90541fbUL, 0x5f85eb7cc1b485dbUL,
+ 0xbee595ce8a9df2e5UL, 0x25e496c722422236UL,
+ /* 120 */ 0x5edf3c46cd0fe5b9UL, 0x34e75a7ed2a43388UL,
+ 0xe488de11d761e352UL, 0x0e878a01a085545cUL,
+ /* 121 */ 0xba493c77e021bb04UL, 0x2b4d1843c7df899aUL,
+ 0x9ea37a487ae80d67UL, 0x67a9958011e41794UL,
+ /* 122 */ 0x4b58051a6697b065UL, 0x47e33f7d8d6ba6d4UL,
+ 0xbb4da8d483ca46c1UL, 0x68becaa181c2db0dUL,
+ /* 123 */ 0x8d8980e90b989aa5UL, 0xf95eb14a2c93c99bUL,
+ 0x51c6c7c4796e73a2UL, 0x6e228363b5efb569UL,
+ /* 124 */ 0xc6bbc0b02dd624c8UL, 0x777eb47dec8170eeUL,
+ 0x3cde15a004cfafa9UL, 0x1dc6bc087160bf9bUL,
+ /* 125 */ 0x2e07e043eec34002UL, 0x18e9fc677a68dc7fUL,
+ 0xd8da03188bd15b9aUL, 0x48fbc3bb00568253UL,
+ /* 126 */ 0x57547d4cfb654ce1UL, 0xd3565b82a058e2adUL,
+ 0xf63eaf0bbf154478UL, 0x47531ef114dfbb18UL,
+ /* 127 */ 0xe1ec630a4278c587UL, 0x5507d546ca8e83f3UL,
+ 0x85e135c63adc0c2bUL, 0x0aa7efa85682844eUL,
+ /* 128 */ 0x72691ba8b3e1f615UL, 0x32b4e9701fbe3ffaUL,
+ 0x97b6d92e39bb7868UL, 0x2cfe53dea02e39e8UL,
+ /* 129 */ 0x687392cd85cd52b0UL, 0x27ff66c910e29831UL,
+ 0x97134556a9832d06UL, 0x269bb0360a84f8a0UL,
+ /* 130 */ 0x706e55457643f85cUL, 0x3734a48c9b597d1bUL,
+ 0x7aee91e8c6efa472UL, 0x5cd6abc198a9d9e0UL,
+ /* 131 */ 0x0e04de06cb3ce41aUL, 0xd8c6eb893402e138UL,
+ 0x904659bb686e3772UL, 0x7215c371746ba8c8UL,
+ /* 132 */ 0xfd12a97eeae4a2d9UL, 0x9514b7516394f2c5UL,
+ 0x266fd5809208f294UL, 0x5c847085619a26b9UL,
+ /* 133 */ 0x52985410fed694eaUL, 0x3c905b934a2ed254UL,
+ 0x10bb47692d3be467UL, 0x063b3d2d69e5e9e1UL,
+ /* 134 */ 0x472726eedda57debUL, 0xefb6c4ae10f41891UL,
+ 0x2b1641917b307614UL, 0x117c554fc4f45b7cUL,
+ /* 135 */ 0xc07cf3118f9d8812UL, 0x01dbd82050017939UL,
+ 0xd7e803f4171b2827UL, 0x1015e87487d225eaUL,
+ /* 136 */ 0xc58de3fed23acc4dUL, 0x50db91c294a7be2dUL,
+ 0x0b94d43d1c9cf457UL, 0x6b1640fa6e37524aUL,
+ /* 137 */ 0x692f346c5fda0d09UL, 0x200b1c59fa4d3151UL,
+ 0xb8c46f760777a296UL, 0x4b38395f3ffdfbcfUL,
+ /* 138 */ 0x18d25e00be54d671UL, 0x60d50582bec8aba6UL,
+ 0x87ad8f263b78b982UL, 0x50fdf64e9cda0432UL,
+ /* 139 */ 0x90f567aac578dcf0UL, 0xef1e9b0ef2a3133bUL,
+ 0x0eebba9242d9de71UL, 0x15473c9bf03101c7UL,
+ /* 140 */ 0x7c77e8ae56b78095UL, 0xb678e7666e6f078eUL,
+ 0x2da0b9615348ba1fUL, 0x7cf931c1ff733f0bUL,
+ /* 141 */ 0x26b357f50a0a366cUL, 0xe9708cf42b87d732UL,
+ 0xc13aeea5f91cb2c0UL, 0x35d90c991143bb4cUL,
+ /* 142 */ 0x47c1c404a9a0d9dcUL, 0x659e58451972d251UL,
+ 0x3875a8c473b38c31UL, 0x1fbd9ed379561f24UL,
+ /* 143 */ 0x11fabc6fd41ec28dUL, 0x7ef8dfe3cd2a2dcaUL,
+ 0x72e73b5d8c404595UL, 0x6135fa4954b72f27UL,
+ /* 144 */ 0xccfc32a2de24b69cUL, 0x3f55698c1f095d88UL,
+ 0xbe3350ed5ac3f929UL, 0x5e9bf806ca477eebUL,
+ /* 145 */ 0xe9ce8fb63c309f68UL, 0x5376f63565e1f9f4UL,
+ 0xd1afcfb35a6393f1UL, 0x6632a1ede5623506UL,
+ /* 146 */ 0x0b7d6c390c2ded4cUL, 0x56cb3281df04cb1fUL,
+ 0x66305a1249ecc3c7UL, 0x5d588b60a38ca72aUL,
+ /* 147 */ 0xa6ecbf78e8e5f42dUL, 0x86eeb44b3c8a3eecUL,
+ 0xec219c48fbd21604UL, 0x1aaf1af517c36731UL,
+ /* 148 */ 0xc306a2836769bde7UL, 0x208280622b1e2adbUL,
+ 0x8027f51ffbff94a6UL, 0x76cfa1ce1124f26bUL,
+ /* 149 */ 0x18eb00562422abb6UL, 0xf377c4d58f8c29c3UL,
+ 0x4dbbc207f531561aUL, 0x0253b7f082128a27UL,
+ /* 150 */ 0x3d1f091cb62c17e0UL, 0x4860e1abd64628a9UL,
+ 0x52d17436309d4253UL, 0x356f97e13efae576UL,
+ /* 151 */ 0xd351e11aa150535bUL, 0x3e6b45bb1dd878ccUL,
+ 0x0c776128bed92c98UL, 0x1d34ae93032885b8UL,
+ /* 152 */ 0x4ba0488ca85ba4c3UL, 0x985348c33c9ce6ceUL,
+ 0x66124c6f97bda770UL, 0x0f81a0290654124aUL,
+ /* 153 */ 0x9ed09ca6569b86fdUL, 0x811009fd18af9a2dUL,
+ 0xff08d03f93d8c20aUL, 0x52a148199faef26bUL,
+ /* 154 */ 0x3e03f9dc2d8d1b73UL, 0x4205801873961a70UL,
+ 0xc0d987f041a35970UL, 0x07aa1f15a1c0d549UL,
+ /* 155 */ 0xdfd46ce08cd27224UL, 0x6d0a024f934e4239UL,
+ 0x808a7a6399897b59UL, 0x0a4556e9e13d95a2UL,
+ /* 156 */ 0xd21a991fe9c13045UL, 0x9b0e8548fe7751b8UL,
+ 0x5da643cb4bf30035UL, 0x77db28d63940f721UL,
+ /* 157 */ 0xfc5eeb614adc9011UL, 0x5229419ae8c411ebUL,
+ 0x9ec3e7787d1dcf74UL, 0x340d053e216e4cb5UL,
+ /* 158 */ 0xcac7af39b48df2b4UL, 0xc0faec2871a10a94UL,
+ 0x140a69245ca575edUL, 0x0cf1c37134273a4cUL,
+ /* 159 */ 0xc8ee306ac224b8a5UL, 0x57eaee7ccb4930b0UL,
+ 0xa1e806bdaacbe74fUL, 0x7d9a62742eeb657dUL,
+ /* 160 */ 0x9eb6b6ef546c4830UL, 0x885cca1fddb36e2eUL,
+ 0xe6b9f383ef0d7105UL, 0x58654fef9d2e0412UL,
+ /* 161 */ 0xa905c4ffbe0e8e26UL, 0x942de5df9b31816eUL,
+ 0x497d723f802e88e1UL, 0x30684dea602f408dUL,
+ /* 162 */ 0x21e5a278a3e6cb34UL, 0xaefb6e6f5b151dc4UL,
+ 0xb30b8e049d77ca15UL, 0x28c3c9cf53b98981UL,
+ /* 163 */ 0x287fb721556cdd2aUL, 0x0d317ca897022274UL,
+ 0x7468c7423a543258UL, 0x4a7f11464eb5642fUL,
+ /* 164 */ 0xa237a4774d193aa6UL, 0xd865986ea92129a1UL,
+ 0x24c515ecf87c1a88UL, 0x604003575f39f5ebUL,
+ /* 165 */ 0x47b9f189570a9b27UL, 0x2b98cede465e4b78UL,
+ 0x026df551dbb85c20UL, 0x74fcd91047e21901UL,
+ /* 166 */ 0x13e2a90a23c1bfa3UL, 0x0cb0074e478519f6UL,
+ 0x5ff1cbbe3af6cf44UL, 0x67fe5438be812dbeUL,
+ /* 167 */ 0xd13cf64fa40f05b0UL, 0x054dfb2f32283787UL,
+ 0x4173915b7f0d2aeaUL, 0x482f144f1f610d4eUL,
+ /* 168 */ 0xf6210201b47f8234UL, 0x5d0ae1929e70b990UL,
+ 0xdcd7f455b049567cUL, 0x7e93d0f1f0916f01UL,
+ /* 169 */ 0xdd79cbf18a7db4faUL, 0xbe8391bf6f74c62fUL,
+ 0x027145d14b8291bdUL, 0x585a73ea2cbf1705UL,
+ /* 170 */ 0x485ca03e928a0db2UL, 0x10fc01a5742857e7UL,
+ 0x2f482edbd6d551a7UL, 0x0f0433b5048fdb8aUL,
+ /* 171 */ 0x60da2e8dd7dc6247UL, 0x88b4c9d38cd4819aUL,
+ 0x13033ac001f66697UL, 0x273b24fe3b367d75UL,
+ /* 172 */ 0xc6e8f66a31b3b9d4UL, 0x281514a494df49d5UL,
+ 0xd1726fdfc8b23da7UL, 0x4b3ae7d103dee548UL,
+ /* 173 */ 0xc6256e19ce4b9d7eUL, 0xff5c5cf186e3c61cUL,
+ 0xacc63ca34b8ec145UL, 0x74621888fee66574UL,
+ /* 174 */ 0x956f409645290a1eUL, 0xef0bf8e3263a962eUL,
+ 0xed6a50eb5ec2647bUL, 0x0694283a9dca7502UL,
+ /* 175 */ 0x769b963643a2dcd1UL, 0x42b7c8ea09fc5353UL,
+ 0x4f002aee13397eabUL, 0x63005e2c19b7d63aUL,
+ /* 176 */ 0xca6736da63023beaUL, 0x966c7f6db12a99b7UL,
+ 0xace09390c537c5e1UL, 0x0b696063a1aa89eeUL,
+ /* 177 */ 0xebb03e97288c56e5UL, 0x432a9f9f938c8be8UL,
+ 0xa6a5a93d5b717f71UL, 0x1a5fb4c3e18f9d97UL,
+ /* 178 */ 0x1c94e7ad1c60cdceUL, 0xee202a43fc02c4a0UL,
+ 0x8dafe4d867c46a20UL, 0x0a10263c8ac27b58UL,
+ /* 179 */ 0xd0dea9dfe4432a4aUL, 0x856af87bbe9277c5UL,
+ 0xce8472acc212c71aUL, 0x6f151b6d9bbb1e91UL,
+ /* 180 */ 0x26776c527ceed56aUL, 0x7d211cb7fbf8faecUL,
+ 0x37ae66a6fd4609ccUL, 0x1f81b702d2770c42UL,
+ /* 181 */ 0x2fb0b057eac58392UL, 0xe1dd89fe29744e9dUL,
+ 0xc964f8eb17beb4f8UL, 0x29571073c9a2d41eUL,
+ /* 182 */ 0xa948a18981c0e254UL, 0x2df6369b65b22830UL,
+ 0xa33eb2d75fcfd3c6UL, 0x078cd6ec4199a01fUL,
+ /* 183 */ 0x4a584a41ad900d2fUL, 0x32142b78e2c74c52UL,
+ 0x68c4e8338431c978UL, 0x7f69ea9008689fc2UL,
+ /* 184 */ 0x52f2c81e46a38265UL, 0xfd78072d04a832fdUL,
+ 0x8cd7d5fa25359e94UL, 0x4de71b7454cc29d2UL,
+ /* 185 */ 0x42eb60ad1eda6ac9UL, 0x0aad37dfdbc09c3aUL,
+ 0x81004b71e33cc191UL, 0x44e6be345122803cUL,
+ /* 186 */ 0x03fe8388ba1920dbUL, 0xf5d57c32150db008UL,
+ 0x49c8c4281af60c29UL, 0x21edb518de701aeeUL,
+ /* 187 */ 0x7fb63e418f06dc99UL, 0xa4460d99c166d7b8UL,
+ 0x24dd5248ce520a83UL, 0x5ec3ad712b928358UL,
+ /* 188 */ 0x15022a5fbd17930fUL, 0xa4f64a77d82570e3UL,
+ 0x12bc8d6915783712UL, 0x498194c0fc620abbUL,
+ /* 189 */ 0x38a2d9d255686c82UL, 0x785c6bd9193e21f0UL,
+ 0xe4d5c81ab24a5484UL, 0x56307860b2e20989UL,
+ /* 190 */ 0x429d55f78b4d74c4UL, 0x22f1834643350131UL,
+ 0x1e60c24598c71fffUL, 0x59f2f014979983efUL,
+ /* 191 */ 0x46a47d56eb494a44UL, 0x3e22a854d636a18eUL,
+ 0xb346e15274491c3bUL, 0x2ceafd4e5390cde7UL,
+ /* 192 */ 0xba8a8538be0d6675UL, 0x4b9074bb50818e23UL,
+ 0xcbdab89085d304c3UL, 0x61a24fe0e56192c4UL,
+ /* 193 */ 0xcb7615e6db525bcbUL, 0xdd7d8c35a567e4caUL,
+ 0xe6b4153acafcdd69UL, 0x2d668e097f3c9766UL,
+ /* 194 */ 0xa57e7e265ce55ef0UL, 0x5d9f4e527cd4b967UL,
+ 0xfbc83606492fd1e5UL, 0x090d52beb7c3f7aeUL,
+ /* 195 */ 0x09b9515a1e7b4d7cUL, 0x1f266a2599da44c0UL,
+ 0xa1c49548e2c55504UL, 0x7ef04287126f15ccUL,
+ /* 196 */ 0xfed1659dbd30ef15UL, 0x8b4ab9eec4e0277bUL,
+ 0x884d6236a5df3291UL, 0x1fd96ea6bf5cf788UL,
+ /* 197 */ 0x42a161981f190d9aUL, 0x61d849507e6052c1UL,
+ 0x9fe113bf285a2cd5UL, 0x7c22d676dbad85d8UL,
+ /* 198 */ 0x82e770ed2bfbd27dUL, 0x4c05b2ece996f5a5UL,
+ 0xcd40a9c2b0900150UL, 0x5895319213d9bf64UL,
+ /* 199 */ 0xe7cc5d703fea2e08UL, 0xb50c491258e2188cUL,
+ 0xcce30baa48205bf0UL, 0x537c659ccfa32d62UL,
+ /* 200 */ 0x37b6623a98cfc088UL, 0xfe9bed1fa4d6aca4UL,
+ 0x04d29b8e56a8d1b0UL, 0x725f71c40b519575UL,
+ /* 201 */ 0x28c7f89cd0339ce6UL, 0x8367b14469ddc18bUL,
+ 0x883ada83a6a1652cUL, 0x585f1974034d6c17UL,
+ /* 202 */ 0x89cfb266f1b19188UL, 0xe63b4863e7c35217UL,
+ 0xd88c9da6b4c0526aUL, 0x3e035c9df0954635UL,
+ /* 203 */ 0xdd9d5412fb45de9dUL, 0xdd684532e4cff40dUL,
+ 0x4b5c999b151d671cUL, 0x2d8c2cc811e7f690UL,
+ /* 204 */ 0x7f54be1d90055d40UL, 0xa464c5df464aaf40UL,
+ 0x33979624f0e917beUL, 0x2c018dc527356b30UL,
+ /* 205 */ 0xa5415024e330b3d4UL, 0x73ff3d96691652d3UL,
+ 0x94ec42c4ef9b59f1UL, 0x0747201618d08e5aUL,
+ /* 206 */ 0x4d6ca48aca411c53UL, 0x66415f2fcfa66119UL,
+ 0x9c4dd40051e227ffUL, 0x59810bc09a02f7ebUL,
+ /* 207 */ 0x2a7eb171b3dc101dUL, 0x441c5ab99ffef68eUL,
+ 0x32025c9b93b359eaUL, 0x5e8ce0a71e9d112fUL,
+ /* 208 */ 0xbfcccb92429503fdUL, 0xd271ba752f095d55UL,
+ 0x345ead5e972d091eUL, 0x18c8df11a83103baUL,
+ /* 209 */ 0x90cd949a9aed0f4cUL, 0xc5d1f4cb6660e37eUL,
+ 0xb8cac52d56c52e0bUL, 0x6e42e400c5808e0dUL,
+ /* 210 */ 0xa3b46966eeaefd23UL, 0x0c4f1f0be39ecdcaUL,
+ 0x189dc8c9d683a51dUL, 0x51f27f054c09351bUL,
+ /* 211 */ 0x4c487ccd2a320682UL, 0x587ea95bb3df1c96UL,
+ 0xc8ccf79e555cb8e8UL, 0x547dc829a206d73dUL,
+ /* 212 */ 0xb822a6cd80c39b06UL, 0xe96d54732000d4c6UL,
+ 0x28535b6f91463b4dUL, 0x228f4660e2486e1dUL,
+ /* 213 */ 0x98799538de8d3abfUL, 0x8cd8330045ebca6eUL,
+ 0x79952a008221e738UL, 0x4322e1a7535cd2bbUL,
+ /* 214 */ 0xb114c11819d1801cUL, 0x2016e4d84f3f5ec7UL,
+ 0xdd0e2df409260f4cUL, 0x5ec362c0ae5f7266UL,
+ /* 215 */ 0xc0462b18b8b2b4eeUL, 0x7cc8d950274d1afbUL,
+ 0xf25f7105436b02d2UL, 0x43bbf8dcbff9ccd3UL,
+ /* 216 */ 0xb6ad1767a039e9dfUL, 0xb0714da8f69d3583UL,
+ 0x5e55fa18b42931f5UL, 0x4ed5558f33c60961UL,
+ /* 217 */ 0x1fe37901c647a5ddUL, 0x593ddf1f8081d357UL,
+ 0x0249a4fd813fd7a6UL, 0x69acca274e9caf61UL,
+ /* 218 */ 0x047ba3ea330721c9UL, 0x83423fc20e7e1ea0UL,
+ 0x1df4c0af01314a60UL, 0x09a62dab89289527UL,
+ /* 219 */ 0xa5b325a49cc6cb00UL, 0xe94b5dc654b56cb6UL,
+ 0x3be28779adc994a0UL, 0x4296e8f8ba3a4aadUL,
+ /* 220 */ 0x328689761e451eabUL, 0x2e4d598bff59594aUL,
+ 0x49b96853d7a7084aUL, 0x4980a319601420a8UL,
+ /* 221 */ 0x9565b9e12f552c42UL, 0x8a5318db7100fe96UL,
+ 0x05c90b4d43add0d7UL, 0x538b4cd66a5d4edaUL,
+ /* 222 */ 0xf4e94fc3e89f039fUL, 0x592c9af26f618045UL,
+ 0x08a36eb5fd4b9550UL, 0x25fffaf6c2ed1419UL,
+ /* 223 */ 0x34434459cc79d354UL, 0xeeecbfb4b1d5476bUL,
+ 0xddeb34a061615d99UL, 0x5129cecceb64b773UL,
+ /* 224 */ 0xee43215894993520UL, 0x772f9c7cf14c0b3bUL,
+ 0xd2e2fce306bedad5UL, 0x715f42b546f06a97UL,
+ /* 225 */ 0x434ecdceda5b5f1aUL, 0x0da17115a49741a9UL,
+ 0x680bd77c73edad2eUL, 0x487c02354edd9041UL,
+ /* 226 */ 0xb8efeff3a70ed9c4UL, 0x56a32aa3e857e302UL,
+ 0xdf3a68bd48a2a5a0UL, 0x07f650b73176c444UL,
+ /* 227 */ 0xe38b9b1626e0ccb1UL, 0x79e053c18b09fb36UL,
+ 0x56d90319c9f94964UL, 0x1ca941e7ac9ff5c4UL,
+ /* 228 */ 0x49c4df29162fa0bbUL, 0x8488cf3282b33305UL,
+ 0x95dfda14cabb437dUL, 0x3391f78264d5ad86UL,
+ /* 229 */ 0x729ae06ae2b5095dUL, 0xd58a58d73259a946UL,
+ 0xe9834262d13921edUL, 0x27fedafaa54bb592UL,
+ /* 230 */ 0xa99dc5b829ad48bbUL, 0x5f025742499ee260UL,
+ 0x802c8ecd5d7513fdUL, 0x78ceb3ef3f6dd938UL,
+ /* 231 */ 0xc342f44f8a135d94UL, 0x7b9edb44828cdda3UL,
+ 0x9436d11a0537cfe7UL, 0x5064b164ec1ab4c8UL,
+ /* 232 */ 0x7020eccfd37eb2fcUL, 0x1f31ea3ed90d25fcUL,
+ 0x1b930d7bdfa1bb34UL, 0x5344467a48113044UL,
+ /* 233 */ 0x70073170f25e6dfbUL, 0xe385dc1a50114cc8UL,
+ 0x2348698ac8fc4f00UL, 0x2a77a55284dd40d8UL,
+ /* 234 */ 0xfe06afe0c98c6ce4UL, 0xc235df96dddfd6e4UL,
+ 0x1428d01e33bf1ed3UL, 0x785768ec9300bdafUL,
+ /* 235 */ 0x9702e57a91deb63bUL, 0x61bdb8bfe5ce8b80UL,
+ 0x645b426f3d1d58acUL, 0x4804a82227a557bcUL,
+ /* 236 */ 0x8e57048ab44d2601UL, 0x68d6501a4b3a6935UL,
+ 0xc39c9ec3f9e1c293UL, 0x4172f257d4de63e2UL,
+ /* 237 */ 0xd368b450330c6401UL, 0x040d3017418f2391UL,
+ 0x2c34bb6090b7d90dUL, 0x16f649228fdfd51fUL,
+ /* 238 */ 0xbea6818e2b928ef5UL, 0xe28ccf91cdc11e72UL,
+ 0x594aaa68e77a36cdUL, 0x313034806c7ffd0fUL,
+ /* 239 */ 0x8a9d27ac2249bd65UL, 0x19a3b464018e9512UL,
+ 0xc26ccff352b37ec7UL, 0x056f68341d797b21UL,
+ /* 240 */ 0x5e79d6757efd2327UL, 0xfabdbcb6553afe15UL,
+ 0xd3e7222c6eaf5a60UL, 0x7046c76d4dae743bUL,
+ /* 241 */ 0x660be872b18d4a55UL, 0x19992518574e1496UL,
+ 0xc103053a302bdcbbUL, 0x3ed8e9800b218e8eUL,
+ /* 242 */ 0x7b0b9239fa75e03eUL, 0xefe9fb684633c083UL,
+ 0x98a35fbe391a7793UL, 0x6065510fe2d0fe34UL,
+ /* 243 */ 0x55cb668548abad0cUL, 0xb4584548da87e527UL,
+ 0x2c43ecea0107c1ddUL, 0x526028809372de35UL,
+ /* 244 */ 0x3415c56af9213b1fUL, 0x5bee1a4d017e98dbUL,
+ 0x13f6b105b5cf709bUL, 0x5ff20e3482b29ab6UL,
+ /* 245 */ 0x0aa29c75cc2e6c90UL, 0xfc7d73ca3a70e206UL,
+ 0x899fc38fc4b5c515UL, 0x250386b124ffc207UL,
+ /* 246 */ 0x54ea28d5ae3d2b56UL, 0x9913149dd6de60ceUL,
+ 0x16694fc58f06d6c1UL, 0x46b23975eb018fc7UL,
+ /* 247 */ 0x470a6a0fb4b7b4e2UL, 0x5d92475a8f7253deUL,
+ 0xabeee5b52fbd3adbUL, 0x7fa20801a0806968UL,
+ /* 248 */ 0x76f3faf19f7714d2UL, 0xb3e840c12f4660c3UL,
+ 0x0fb4cd8df212744eUL, 0x4b065a251d3a2dd2UL,
+ /* 249 */ 0x5cebde383d77cd4aUL, 0x6adf39df882c9cb1UL,
+ 0xa2dd242eb09af759UL, 0x3147c0e50e5f6422UL,
+ /* 250 */ 0x164ca5101d1350dbUL, 0xf8d13479c33fc962UL,
+ 0xe640ce4d13e5da08UL, 0x4bdee0c45061f8baUL,
+ /* 251 */ 0xd7c46dc1a4edb1c9UL, 0x5514d7b6437fd98aUL,
+ 0x58942f6bb2a1c00bUL, 0x2dffb2ab1d70710eUL,
+ /* 252 */ 0xccdfcf2fc18b6d68UL, 0xa8ebcba8b7806167UL,
+ 0x980697f95e2937e3UL, 0x02fbba1cd0126e8cUL
+};
+
+/* c is two 512-bit products: c0[0:7]=a0[0:3]*b0[0:3] and c1[8:15]=a1[4:7]*b1[4:7]
+ * a is two 256-bit integers: a0[0:3] and a1[4:7]
+ * b is two 256-bit integers: b0[0:3] and b1[4:7]
+ */
+static void mul2_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "xorl %%r14d, %%r14d ;"
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, 64(%0);"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "adox %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adox %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adox %%r10, %%rbx ;"
+ /******************************************/
+ "adox %%r14, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "adox %%r15, %%r8 ;"
+ "movq %%r8, 72(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rax ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rbx ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rcx ;"
+ /******************************************/
+ "adox %%r14, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "adox %%rax, %%r8 ;"
+ "movq %%r8, 80(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rbx ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%rcx ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%r15 ;"
+ /******************************************/
+ "adox %%r14, %%rax ;"
+ "adcx %%r14, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "xorl %%r10d, %%r10d ;"
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "adox %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0);"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adox %%r10, %%r9 ;"
+ "adcx %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adox %%r8, %%r11 ;"
+ "adcx %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adox %%r10, %%r13 ;"
+ "adcx %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ /******************************************/
+ "adox %%r14, %%rbx ;"
+ "adcx %%r14, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void mul2_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+
+ "movq 32(%1), %%rdx; " /* C[0] */
+ "mulx 32(%2), %%r8, %%r15; " /* C[0]*D[0] */
+ "movq %%r8, 64(%0) ;"
+ "mulx 40(%2), %%r10, %%rax; " /* C[0]*D[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 48(%2), %%r8, %%rbx; " /* C[0]*D[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 56(%2), %%r10, %%rcx; " /* C[0]*D[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 40(%1), %%rdx; " /* C[1] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[1]*D[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[1]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[1]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%r15; " /* C[1]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 48(%1), %%rdx; " /* C[2] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[2]*D[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 80(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[2]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[2]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rax; " /* C[2]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 56(%1), %%rdx; " /* C[3] */
+ "mulx 32(%2), %%r8, %%r9; " /* C[3]*D[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 88(%0) ;"
+ "mulx 40(%2), %%r10, %%r11; " /* C[3]*D[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 48(%2), %%r8, %%r13; " /* C[3]*D[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 56(%2), %%r10, %%rbx; " /* C[3]*D[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 96(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 112(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 120(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr2_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 40(%1), %%r8, %%r14 ;" /* B[1]*B[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 48(%1), %%r9, %%r10 ;" /* B[2]*B[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[3]*B[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 56(%1), %%rdx ;" /* B[3] */
+ "mulx 40(%1), %%r11, %%rbx ;" /* B[1]*B[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 48(%1), %%rax, %%r13 ;" /* B[2]*B[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 48(%1), %%rax, %%rcx ;" /* B[2]*B[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr2_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+
+ "movq 40(%1), %%rdx ;" /* B[1] */
+ "mulx 32(%1), %%r8, %%r9 ;" /* B[0]*B[1] */
+ "mulx 48(%1), %%r10, %%r11 ;" /* B[2]*B[1] */
+ "mulx 56(%1), %%rcx, %%r14 ;" /* B[3]*B[1] */
+
+ "movq 48(%1), %%rdx ;" /* B[2] */
+ "mulx 56(%1), %%r15, %%r13 ;" /* B[3]*B[2] */
+ "mulx 32(%1), %%rax, %%rdx ;" /* B[0]*B[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq 32(%1), %%rdx ;" /* B[0] */
+ "mulx 56(%1), %%rax, %%rcx ;" /* B[0]*B[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[0]^2 */
+ /*******************/
+ "movq %%rax, 64(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 72(%0) ;"
+ "movq 40(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 80(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 88(%0) ;"
+ "movq 48(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 96(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 104(%0) ;"
+ "movq 56(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ; " /* B[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 112(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 120(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_2w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10; " /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox 64(%1), %%r8 ;"
+ "mulx 104(%1), %%r9, %%r11; " /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 72(%1), %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax; " /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 80(%1), %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx; " /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 88(%1), %%r11 ;"
+ /****************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_2w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ; " /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+
+ "mulx 96(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 104(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 112(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 120(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /****************************************/
+ "adcq $0, %%rcx ;"
+ "addq 64(%1), %%r8 ;"
+ "adcq 72(%1), %%r9 ;"
+ "adcq 80(%1), %%r10 ;"
+ "adcq 88(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 40(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 48(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 56(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 32(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void mul_256x256_integer_adx(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r9; " /* A[0]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[0]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "movq %%r10, 8(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[0]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[0]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 8(%0), %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[1]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[1]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 16(%0), %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[2]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[2]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "xorl %%r10d, %%r10d ;"
+ "adcx 24(%0), %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adox %%r9, %%r10 ;"
+ "adcx %%r15, %%r10 ;"
+ "movq %%r10, 32(%0) ;"
+ "mulx 16(%2), %%r15, %%r13; " /* A[3]*B[2] */
+ "adox %%r11, %%r15 ;"
+ "adcx %%r14, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq $0, %%r8 ;"
+ "mulx 24(%2), %%r14, %%rdx; " /* A[3]*B[3] */
+ "adox %%r13, %%r14 ;"
+ "adcx %%rax, %%r14 ;"
+ "movq %%r14, 48(%0) ;"
+ "movq $0, %%rax ;"
+ /******************************************/
+ "adox %%rdx, %%rax ;"
+ "adcx %%r8, %%rax ;"
+ "movq %%rax, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11",
+ "%r13", "%r14", "%r15");
+}
+
+static void mul_256x256_integer_bmi2(u64 *const c, const u64 *const a,
+ const u64 *const b)
+{
+ asm volatile(
+ "movq (%1), %%rdx; " /* A[0] */
+ "mulx (%2), %%r8, %%r15; " /* A[0]*B[0] */
+ "movq %%r8, (%0) ;"
+ "mulx 8(%2), %%r10, %%rax; " /* A[0]*B[1] */
+ "addq %%r10, %%r15 ;"
+ "mulx 16(%2), %%r8, %%rbx; " /* A[0]*B[2] */
+ "adcq %%r8, %%rax ;"
+ "mulx 24(%2), %%r10, %%rcx; " /* A[0]*B[3] */
+ "adcq %%r10, %%rbx ;"
+ /******************************************/
+ "adcq $0, %%rcx ;"
+
+ "movq 8(%1), %%rdx; " /* A[1] */
+ "mulx (%2), %%r8, %%r9; " /* A[1]*B[0] */
+ "addq %%r15, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[1]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[1]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%r15; " /* A[1]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%r15 ;"
+
+ "addq %%r9, %%rax ;"
+ "adcq %%r11, %%rbx ;"
+ "adcq %%r13, %%rcx ;"
+ "adcq $0, %%r15 ;"
+
+ "movq 16(%1), %%rdx; " /* A[2] */
+ "mulx (%2), %%r8, %%r9; " /* A[2]*B[0] */
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, 16(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[2]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[2]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rax; " /* A[2]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rax ;"
+
+ "addq %%r9, %%rbx ;"
+ "adcq %%r11, %%rcx ;"
+ "adcq %%r13, %%r15 ;"
+ "adcq $0, %%rax ;"
+
+ "movq 24(%1), %%rdx; " /* A[3] */
+ "mulx (%2), %%r8, %%r9; " /* A[3]*B[0] */
+ "addq %%rbx, %%r8 ;"
+ "movq %%r8, 24(%0) ;"
+ "mulx 8(%2), %%r10, %%r11; " /* A[3]*B[1] */
+ "adcq %%r10, %%r9 ;"
+ "mulx 16(%2), %%r8, %%r13; " /* A[3]*B[2] */
+ "adcq %%r8, %%r11 ;"
+ "mulx 24(%2), %%r10, %%rbx; " /* A[3]*B[3] */
+ "adcq %%r10, %%r13 ;"
+ /******************************************/
+ "adcq $0, %%rbx ;"
+
+ "addq %%r9, %%rcx ;"
+ "movq %%rcx, 32(%0) ;"
+ "adcq %%r11, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "adcq %%r13, %%rax ;"
+ "movq %%rax, 48(%0) ;"
+ "adcq $0, %%rbx ;"
+ "movq %%rbx, 56(%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r15");
+}
+
+static void sqr_256x256_integer_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 8(%1), %%r8, %%r14 ;" /* A[1]*A[0] */
+ "xorl %%r15d, %%r15d;"
+ "mulx 16(%1), %%r9, %%r10 ;" /* A[2]*A[0] */
+ "adcx %%r14, %%r9 ;"
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[3]*A[0] */
+ "adcx %%rax, %%r10 ;"
+ "movq 24(%1), %%rdx ;" /* A[3] */
+ "mulx 8(%1), %%r11, %%rbx ;" /* A[1]*A[3] */
+ "adcx %%rcx, %%r11 ;"
+ "mulx 16(%1), %%rax, %%r13 ;" /* A[2]*A[3] */
+ "adcx %%rax, %%rbx ;"
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "adcx %%r15, %%r13 ;"
+ "mulx 16(%1), %%rax, %%rcx ;" /* A[2]*A[1] */
+ "movq $0, %%r14 ;"
+ /******************************************/
+ "adcx %%r15, %%r14 ;"
+
+ "xorl %%r15d, %%r15d;"
+ "adox %%rax, %%r10 ;"
+ "adcx %%r8, %%r8 ;"
+ "adox %%rcx, %%r11 ;"
+ "adcx %%r9, %%r9 ;"
+ "adox %%r15, %%rbx ;"
+ "adcx %%r10, %%r10 ;"
+ "adox %%r15, %%r13 ;"
+ "adcx %%r11, %%r11 ;"
+ "adox %%r15, %%r14 ;"
+ "adcx %%rbx, %%rbx ;"
+ "adcx %%r13, %%r13 ;"
+ "adcx %%r14, %%r14 ;"
+
+ "movq (%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%rbx ;"
+ "movq %%rbx, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11", "%r13", "%r14", "%r15");
+}
+
+static void sqr_256x256_integer_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movq 8(%1), %%rdx ;" /* A[1] */
+ "mulx (%1), %%r8, %%r9 ;" /* A[0]*A[1] */
+ "mulx 16(%1), %%r10, %%r11 ;" /* A[2]*A[1] */
+ "mulx 24(%1), %%rcx, %%r14 ;" /* A[3]*A[1] */
+
+ "movq 16(%1), %%rdx ;" /* A[2] */
+ "mulx 24(%1), %%r15, %%r13 ;" /* A[3]*A[2] */
+ "mulx (%1), %%rax, %%rdx ;" /* A[0]*A[2] */
+
+ "addq %%rax, %%r9 ;"
+ "adcq %%rdx, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq %%r14, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "movq $0, %%r14 ;"
+ "adcq $0, %%r14 ;"
+
+ "movq (%1), %%rdx ;" /* A[0] */
+ "mulx 24(%1), %%rax, %%rcx ;" /* A[0]*A[3] */
+
+ "addq %%rax, %%r10 ;"
+ "adcq %%rcx, %%r11 ;"
+ "adcq $0, %%r15 ;"
+ "adcq $0, %%r13 ;"
+ "adcq $0, %%r14 ;"
+
+ "shldq $1, %%r13, %%r14 ;"
+ "shldq $1, %%r15, %%r13 ;"
+ "shldq $1, %%r11, %%r15 ;"
+ "shldq $1, %%r10, %%r11 ;"
+ "shldq $1, %%r9, %%r10 ;"
+ "shldq $1, %%r8, %%r9 ;"
+ "shlq $1, %%r8 ;"
+
+ /*******************/
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[0]^2 */
+ /*******************/
+ "movq %%rax, 0(%0) ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, 8(%0) ;"
+ "movq 8(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[1]^2 */
+ "adcq %%rax, %%r9 ;"
+ "movq %%r9, 16(%0) ;"
+ "adcq %%rcx, %%r10 ;"
+ "movq %%r10, 24(%0) ;"
+ "movq 16(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[2]^2 */
+ "adcq %%rax, %%r11 ;"
+ "movq %%r11, 32(%0) ;"
+ "adcq %%rcx, %%r15 ;"
+ "movq %%r15, 40(%0) ;"
+ "movq 24(%1), %%rdx ;"
+ "mulx %%rdx, %%rax, %%rcx ;" /* A[3]^2 */
+ "adcq %%rax, %%r13 ;"
+ "movq %%r13, 48(%0) ;"
+ "adcq %%rcx, %%r14 ;"
+ "movq %%r14, 56(%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11", "%r13", "%r14", "%r15");
+}
+
+static void red_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "xorl %%ebx, %%ebx ;"
+ "adox (%1), %%r8 ;"
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "adcx %%r10, %%r9 ;"
+ "adox 8(%1), %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcx %%r11, %%r10 ;"
+ "adox 16(%1), %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcx %%rax, %%r11 ;"
+ "adox 24(%1), %%r11 ;"
+ /***************************************/
+ "adcx %%rbx, %%rcx ;"
+ "adox %%rbx, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0, of=0 */
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rbx, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rbx, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rbx, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9",
+ "%r10", "%r11");
+}
+
+static void red_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ asm volatile(
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 */
+ "mulx 32(%1), %%r8, %%r10 ;" /* c*C[4] */
+ "mulx 40(%1), %%r9, %%r11 ;" /* c*C[5] */
+ "addq %%r10, %%r9 ;"
+ "mulx 48(%1), %%r10, %%rax ;" /* c*C[6] */
+ "adcq %%r11, %%r10 ;"
+ "mulx 56(%1), %%r11, %%rcx ;" /* c*C[7] */
+ "adcq %%rax, %%r11 ;"
+ /***************************************/
+ "adcq $0, %%rcx ;"
+ "addq (%1), %%r8 ;"
+ "adcq 8(%1), %%r9 ;"
+ "adcq 16(%1), %%r10 ;"
+ "adcq 24(%1), %%r11 ;"
+ "adcq $0, %%rcx ;"
+ "imul %%rdx, %%rcx ;" /* c*C[4], cf=0 */
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_adx(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "xorl %%ecx, %%ecx ;"
+ "movq (%2), %%r8 ;"
+ "adcx (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcx 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcx 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcx 24(%1), %%r11 ;"
+ "cmovc %%eax, %%ecx ;"
+ "xorl %%eax, %%eax ;"
+ "adcx %%rcx, %%r8 ;"
+ "adcx %%rax, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcx %%rax, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcx %%rax, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $38, %%ecx ;"
+ "cmovc %%ecx, %%eax ;"
+ "addq %%rax, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+add_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%2), %%r8 ;"
+ "addq (%1), %%r8 ;"
+ "movq 8(%2), %%r9 ;"
+ "adcq 8(%1), %%r9 ;"
+ "movq 16(%2), %%r10 ;"
+ "adcq 16(%1), %%r10 ;"
+ "movq 24(%2), %%r11 ;"
+ "adcq 24(%1), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+static __always_inline void
+sub_eltfp25519_1w(u64 *const c, const u64 *const a, const u64 *const b)
+{
+ asm volatile(
+ "mov $38, %%eax ;"
+ "movq (%1), %%r8 ;"
+ "subq (%2), %%r8 ;"
+ "movq 8(%1), %%r9 ;"
+ "sbbq 8(%2), %%r9 ;"
+ "movq 16(%1), %%r10 ;"
+ "sbbq 16(%2), %%r10 ;"
+ "movq 24(%1), %%r11 ;"
+ "sbbq 24(%2), %%r11 ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "sbbq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "sbbq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "sbbq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%eax, %%ecx ;"
+ "subq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(b)
+ : "memory", "cc", "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11");
+}
+
+/* Multiplication by a24 = (A+2)/4 = (486662+2)/4 = 121666 */
+static __always_inline void
+mul_a24_eltfp25519_1w(u64 *const c, const u64 *const a)
+{
+ const u64 a24 = 121666;
+ asm volatile(
+ "movq %2, %%rdx ;"
+ "mulx (%1), %%r8, %%r10 ;"
+ "mulx 8(%1), %%r9, %%r11 ;"
+ "addq %%r10, %%r9 ;"
+ "mulx 16(%1), %%r10, %%rax ;"
+ "adcq %%r11, %%r10 ;"
+ "mulx 24(%1), %%r11, %%rcx ;"
+ "adcq %%rax, %%r11 ;"
+ /**************************/
+ "adcq $0, %%rcx ;"
+ "movl $38, %%edx ;" /* 2*c = 38 = 2^256 mod 2^255-19*/
+ "imul %%rdx, %%rcx ;"
+ "addq %%rcx, %%r8 ;"
+ "adcq $0, %%r9 ;"
+ "movq %%r9, 8(%0) ;"
+ "adcq $0, %%r10 ;"
+ "movq %%r10, 16(%0) ;"
+ "adcq $0, %%r11 ;"
+ "movq %%r11, 24(%0) ;"
+ "mov $0, %%ecx ;"
+ "cmovc %%edx, %%ecx ;"
+ "addq %%rcx, %%r8 ;"
+ "movq %%r8, (%0) ;"
+ :
+ : "r"(c), "r"(a), "r"(a24)
+ : "memory", "cc", "%rax", "%rcx", "%rdx", "%r8", "%r9", "%r10",
+ "%r11");
+}
+
+static void inv_eltfp25519_1w_adx(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[4];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_adx(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 2);
+ mul_eltfp25519_1w_adx(T[0], a, T[2]);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_adx(T[2], 1);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 10);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 20);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_adx(T[3], 10);
+ mul_eltfp25519_1w_adx(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_adx(T[0], 50);
+ mul_eltfp25519_1w_adx(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 100);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_adx(T[2], 50);
+ mul_eltfp25519_1w_adx(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_adx(T[2], 5);
+ mul_eltfp25519_1w_adx(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void inv_eltfp25519_1w_bmi2(u64 *const c, const u64 *const a)
+{
+ struct {
+ eltfp25519_1w_buffer buffer;
+ eltfp25519_1w x0, x1, x2;
+ } __aligned(32) m;
+ u64 *T[5];
+
+ T[0] = m.x0;
+ T[1] = c; /* x^(-1) */
+ T[2] = m.x1;
+ T[3] = m.x2;
+
+ copy_eltfp25519_1w(T[1], a);
+ sqrn_eltfp25519_1w_bmi2(T[1], 1);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 2);
+ mul_eltfp25519_1w_bmi2(T[0], a, T[2]);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[0]);
+ copy_eltfp25519_1w(T[2], T[1]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 1);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[2]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 10);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ copy_eltfp25519_1w(T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 20);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[2]);
+ sqrn_eltfp25519_1w_bmi2(T[3], 10);
+ mul_eltfp25519_1w_bmi2(T[3], T[3], T[0]);
+ copy_eltfp25519_1w(T[0], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[0], 50);
+ mul_eltfp25519_1w_bmi2(T[0], T[0], T[3]);
+ copy_eltfp25519_1w(T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 100);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[0]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 50);
+ mul_eltfp25519_1w_bmi2(T[2], T[2], T[3]);
+ sqrn_eltfp25519_1w_bmi2(T[2], 5);
+ mul_eltfp25519_1w_bmi2(T[1], T[1], T[2]);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+/* Given c, a 256-bit number, fred_eltfp25519_1w updates c
+ * with a number such that 0 <= C < 2**255-19.
+ */
+static __always_inline void fred_eltfp25519_1w(u64 *const c)
+{
+ u64 tmp0 = 38, tmp1 = 19;
+ asm volatile(
+ "btrq $63, %3 ;" /* Put bit 255 in carry flag and clear */
+ "cmovncl %k5, %k4 ;" /* c[255] ? 38 : 19 */
+
+ /* Add either 19 or 38 to c */
+ "addq %4, %0 ;"
+ "adcq $0, %1 ;"
+ "adcq $0, %2 ;"
+ "adcq $0, %3 ;"
+
+ /* Test for bit 255 again; only triggered on overflow modulo 2^255-19 */
+ "movl $0, %k4 ;"
+ "cmovnsl %k5, %k4 ;" /* c[255] ? 0 : 19 */
+ "btrq $63, %3 ;" /* Clear bit 255 */
+
+ /* Subtract 19 if necessary */
+ "subq %4, %0 ;"
+ "sbbq $0, %1 ;"
+ "sbbq $0, %2 ;"
+ "sbbq $0, %3 ;"
+
+ : "+r"(c[0]), "+r"(c[1]), "+r"(c[2]), "+r"(c[3]), "+r"(tmp0),
+ "+r"(tmp1)
+ :
+ : "memory", "cc");
+}
+
+static __always_inline void cswap(u8 bit, u64 *const px, u64 *const py)
+{
+ u64 temp;
+ asm volatile(
+ "test %9, %9 ;"
+ "movq %0, %8 ;"
+ "cmovnzq %4, %0 ;"
+ "cmovnzq %8, %4 ;"
+ "movq %1, %8 ;"
+ "cmovnzq %5, %1 ;"
+ "cmovnzq %8, %5 ;"
+ "movq %2, %8 ;"
+ "cmovnzq %6, %2 ;"
+ "cmovnzq %8, %6 ;"
+ "movq %3, %8 ;"
+ "cmovnzq %7, %3 ;"
+ "cmovnzq %8, %7 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3]),
+ "+r"(py[0]), "+r"(py[1]), "+r"(py[2]), "+r"(py[3]),
+ "=r"(temp)
+ : "r"(bit)
+ : "cc"
+ );
+}
+
+static __always_inline void cselect(u8 bit, u64 *const px, const u64 *const py)
+{
+ asm volatile(
+ "test %4, %4 ;"
+ "cmovnzq %5, %0 ;"
+ "cmovnzq %6, %1 ;"
+ "cmovnzq %7, %2 ;"
+ "cmovnzq %8, %3 ;"
+ : "+r"(px[0]), "+r"(px[1]), "+r"(px[2]), "+r"(px[3])
+ : "r"(bit), "rm"(py[0]), "rm"(py[1]), "rm"(py[2]), "rm"(py[3])
+ : "cc"
+ );
+}
+
+static void curve25519_adx(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_adx(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_adx(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_adx(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_adx(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_adx(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_adx(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_adx(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_adx(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_adx(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_adx(A, Qz);
+ mul_eltfp25519_1w_adx((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_adx_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_adx(C, &P[4 * k], B); /* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_adx(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_adx(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_adx(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_adx(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_adx(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_adx(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_adx(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_adx(A, Zr1);
+ mul_eltfp25519_1w_adx((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2(u8 shared[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE],
+ const u8 session_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[6 * NUM_WORDS_ELTFP25519];
+ u8 session[CURVE25519_KEY_SIZE];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ int i = 0, j = 0;
+ u64 prev = 0;
+ u64 *const X1 = (u64 *)m.session;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Px = m.coordinates + 0;
+ u64 *const Pz = m.coordinates + 4;
+ u64 *const Qx = m.coordinates + 8;
+ u64 *const Qz = m.coordinates + 12;
+ u64 *const X2 = Qx;
+ u64 *const Z2 = Qz;
+ u64 *const X3 = Px;
+ u64 *const Z3 = Pz;
+ u64 *const X2Z2 = Qx;
+ u64 *const X3Z3 = Px;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const D = m.workspace + 8;
+ u64 *const C = m.workspace + 12;
+ u64 *const DA = m.workspace + 16;
+ u64 *const CB = m.workspace + 20;
+ u64 *const AB = A;
+ u64 *const DC = D;
+ u64 *const DACB = DA;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+ memcpy(m.session, session_key, sizeof(m.session));
+
+ curve25519_clamp_secret(m.private);
+
+ /* As in the draft:
+ * When receiving such an array, implementations of curve25519
+ * MUST mask the most-significant bit in the final byte. This
+ * is done to preserve compatibility with point formats which
+ * reserve the sign bit for use in other protocols and to
+ * increase resistance to implementation fingerprinting
+ */
+ m.session[CURVE25519_KEY_SIZE - 1] &= (1 << (255 % 8)) - 1;
+
+ copy_eltfp25519_1w(Px, X1);
+ setzero_eltfp25519_1w(Pz);
+ setzero_eltfp25519_1w(Qx);
+ setzero_eltfp25519_1w(Qz);
+
+ Pz[0] = 1;
+ Qx[0] = 1;
+
+ /* main-loop */
+ prev = 0;
+ j = 62;
+ for (i = 3; i >= 0; --i) {
+ while (j >= 0) {
+ u64 bit = (key[i] >> j) & 0x1;
+ u64 swap = bit ^ prev;
+ prev = bit;
+
+ add_eltfp25519_1w_bmi2(A, X2, Z2); /* A = (X2+Z2) */
+ sub_eltfp25519_1w(B, X2, Z2); /* B = (X2-Z2) */
+ add_eltfp25519_1w_bmi2(C, X3, Z3); /* C = (X3+Z3) */
+ sub_eltfp25519_1w(D, X3, Z3); /* D = (X3-Z3) */
+ mul_eltfp25519_2w_bmi2(DACB, AB, DC); /* [DA|CB] = [A|B]*[D|C] */
+
+ cselect(swap, A, C);
+ cselect(swap, B, D);
+
+ sqr_eltfp25519_2w_bmi2(AB); /* [AA|BB] = [A^2|B^2] */
+ add_eltfp25519_1w_bmi2(X3, DA, CB); /* X3 = (DA+CB) */
+ sub_eltfp25519_1w(Z3, DA, CB); /* Z3 = (DA-CB) */
+ sqr_eltfp25519_2w_bmi2(X3Z3); /* [X3|Z3] = [(DA+CB)|(DA+CB)]^2 */
+
+ copy_eltfp25519_1w(X2, B); /* X2 = B^2 */
+ sub_eltfp25519_1w(Z2, A, B); /* Z2 = E = AA-BB */
+
+ mul_a24_eltfp25519_1w(B, Z2); /* B = a24*E */
+ add_eltfp25519_1w_bmi2(B, B, X2); /* B = a24*E+B */
+ mul_eltfp25519_2w_bmi2(X2Z2, X2Z2, AB); /* [X2|Z2] = [B|E]*[A|a24*E+B] */
+ mul_eltfp25519_1w_bmi2(Z3, Z3, X1); /* Z3 = Z3*X1 */
+ --j;
+ }
+ j = 63;
+ }
+
+ inv_eltfp25519_1w_bmi2(A, Qz);
+ mul_eltfp25519_1w_bmi2((u64 *)shared, Qx, A);
+ fred_eltfp25519_1w((u64 *)shared);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+static void curve25519_bmi2_base(u8 session_key[CURVE25519_KEY_SIZE],
+ const u8 private_key[CURVE25519_KEY_SIZE])
+{
+ struct {
+ u64 buffer[4 * NUM_WORDS_ELTFP25519];
+ u64 coordinates[4 * NUM_WORDS_ELTFP25519];
+ u64 workspace[4 * NUM_WORDS_ELTFP25519];
+ u8 private[CURVE25519_KEY_SIZE];
+ } __aligned(32) m;
+
+ const int ite[4] = { 64, 64, 64, 63 };
+ const int q = 3;
+ u64 swap = 1;
+
+ int i = 0, j = 0, k = 0;
+ u64 *const key = (u64 *)m.private;
+ u64 *const Ur1 = m.coordinates + 0;
+ u64 *const Zr1 = m.coordinates + 4;
+ u64 *const Ur2 = m.coordinates + 8;
+ u64 *const Zr2 = m.coordinates + 12;
+
+ u64 *const UZr1 = m.coordinates + 0;
+ u64 *const ZUr2 = m.coordinates + 8;
+
+ u64 *const A = m.workspace + 0;
+ u64 *const B = m.workspace + 4;
+ u64 *const C = m.workspace + 8;
+ u64 *const D = m.workspace + 12;
+
+ u64 *const AB = m.workspace + 0;
+ u64 *const CD = m.workspace + 8;
+
+ const u64 *const P = table_ladder_8k;
+
+ memcpy(m.private, private_key, sizeof(m.private));
+
+ curve25519_clamp_secret(m.private);
+
+ setzero_eltfp25519_1w(Ur1);
+ setzero_eltfp25519_1w(Zr1);
+ setzero_eltfp25519_1w(Zr2);
+ Ur1[0] = 1;
+ Zr1[0] = 1;
+ Zr2[0] = 1;
+
+ /* G-S */
+ Ur2[3] = 0x1eaecdeee27cab34UL;
+ Ur2[2] = 0xadc7a0b9235d48e2UL;
+ Ur2[1] = 0xbbf095ae14b2edf8UL;
+ Ur2[0] = 0x7e94e1fec82faabdUL;
+
+ /* main-loop */
+ j = q;
+ for (i = 0; i < NUM_WORDS_ELTFP25519; ++i) {
+ while (j < ite[i]) {
+ u64 bit = (key[i] >> j) & 0x1;
+ k = (64 * i + j - q);
+ swap = swap ^ bit;
+ cswap(swap, Ur1, Ur2);
+ cswap(swap, Zr1, Zr2);
+ swap = bit;
+ /* Addition */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ mul_eltfp25519_1w_bmi2(C, &P[4 * k], B);/* C = M0-B */
+ sub_eltfp25519_1w(B, A, C); /* B = (Ur1+Zr1) - M*(Ur1-Zr1) */
+ add_eltfp25519_1w_bmi2(A, A, C); /* A = (Ur1+Zr1) + M*(Ur1-Zr1) */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A^2 | B = B^2 */
+ mul_eltfp25519_2w_bmi2(UZr1, ZUr2, AB); /* Ur1 = Zr2*A | Zr1 = Ur2*B */
+ ++j;
+ }
+ j = 0;
+ }
+
+ /* Doubling */
+ for (i = 0; i < q; ++i) {
+ add_eltfp25519_1w_bmi2(A, Ur1, Zr1); /* A = Ur1+Zr1 */
+ sub_eltfp25519_1w(B, Ur1, Zr1); /* B = Ur1-Zr1 */
+ sqr_eltfp25519_2w_bmi2(AB); /* A = A**2 B = B**2 */
+ copy_eltfp25519_1w(C, B); /* C = B */
+ sub_eltfp25519_1w(B, A, B); /* B = A-B */
+ mul_a24_eltfp25519_1w(D, B); /* D = my_a24*B */
+ add_eltfp25519_1w_bmi2(D, D, C); /* D = D+C */
+ mul_eltfp25519_2w_bmi2(UZr1, AB, CD); /* Ur1 = A*B Zr1 = Zr1*A */
+ }
+
+ /* Convert to affine coordinates */
+ inv_eltfp25519_1w_bmi2(A, Zr1);
+ mul_eltfp25519_1w_bmi2((u64 *)session_key, Ur1, A);
+ fred_eltfp25519_1w((u64 *)session_key);
+
+ memzero_explicit(&m, sizeof(m));
+}
+
+void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE],
+ const u8 basepoint[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx(mypublic, secret, basepoint);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2(mypublic, secret, basepoint);
+ else
+ curve25519_generic(mypublic, secret, basepoint);
+}
+EXPORT_SYMBOL(curve25519_arch);
+
+void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE],
+ const u8 secret[CURVE25519_KEY_SIZE])
+{
+ if (static_branch_likely(&curve25519_use_adx))
+ curve25519_adx_base(pub, secret);
+ else if (static_branch_likely(&curve25519_use_bmi2))
+ curve25519_bmi2_base(pub, secret);
+ else
+ curve25519_generic(pub, secret, curve25519_base_point);
+}
+EXPORT_SYMBOL(curve25519_base_arch);
+
+static int curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
+ unsigned int len)
+{
+ u8 *secret = kpp_tfm_ctx(tfm);
+
+ if (!len)
+ curve25519_generate_secret(secret);
+ else if (len == CURVE25519_KEY_SIZE &&
+ crypto_memneq(buf, curve25519_null_point, CURVE25519_KEY_SIZE))
+ memcpy(secret, buf, CURVE25519_KEY_SIZE);
+ else
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_generate_public_key(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (req->src)
+ return -EINVAL;
+
+ curve25519_base_arch(buf, secret);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static int curve25519_compute_shared_secret(struct kpp_request *req)
+{
+ struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
+ const u8 *secret = kpp_tfm_ctx(tfm);
+ u8 public_key[CURVE25519_KEY_SIZE];
+ u8 buf[CURVE25519_KEY_SIZE];
+ int copied, nbytes;
+
+ if (!req->src)
+ return -EINVAL;
+
+ copied = sg_copy_to_buffer(req->src,
+ sg_nents_for_len(req->src,
+ CURVE25519_KEY_SIZE),
+ public_key, CURVE25519_KEY_SIZE);
+ if (copied != CURVE25519_KEY_SIZE)
+ return -EINVAL;
+
+ curve25519_arch(buf, secret, public_key);
+
+ /* might want less than we've got */
+ nbytes = min_t(size_t, CURVE25519_KEY_SIZE, req->dst_len);
+ copied = sg_copy_from_buffer(req->dst, sg_nents_for_len(req->dst,
+ nbytes),
+ buf, nbytes);
+ if (copied != nbytes)
+ return -EINVAL;
+ return 0;
+}
+
+static unsigned int curve25519_max_size(struct crypto_kpp *tfm)
+{
+ return CURVE25519_KEY_SIZE;
+}
+
+static struct kpp_alg curve25519_alg = {
+ .base.cra_name = "curve25519",
+ .base.cra_driver_name = "curve25519-x86",
+ .base.cra_priority = 200,
+ .base.cra_module = THIS_MODULE,
+ .base.cra_ctxsize = CURVE25519_KEY_SIZE,
+
+ .set_secret = curve25519_set_secret,
+ .generate_public_key = curve25519_generate_public_key,
+ .compute_shared_secret = curve25519_compute_shared_secret,
+ .max_size = curve25519_max_size,
+};
+
+static int __init curve25519_mod_init(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2))
+ static_branch_enable(&curve25519_use_bmi2);
+ else if (boot_cpu_has(X86_FEATURE_ADX))
+ static_branch_enable(&curve25519_use_adx);
+ else
+ return 0;
+ return crypto_register_kpp(&curve25519_alg);
+}
+
+static void __exit curve25519_mod_exit(void)
+{
+ if (boot_cpu_has(X86_FEATURE_BMI2) ||
+ boot_cpu_has(X86_FEATURE_ADX))
+ crypto_unregister_kpp(&curve25519_alg);
+}
+
+module_init(curve25519_mod_init);
+module_exit(curve25519_mod_exit);
+
+MODULE_ALIAS_CRYPTO("curve25519");
+MODULE_ALIAS_CRYPTO("curve25519-x86");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S
index 7fca43099a5f..fac0fdc3f25d 100644
--- a/arch/x86/crypto/des3_ede-asm_64.S
+++ b/arch/x86/crypto/des3_ede-asm_64.S
@@ -162,7 +162,7 @@
movl left##d, (io); \
movl right##d, 4(io);
-ENTRY(des3_ede_x86_64_crypt_blk)
+SYM_FUNC_START(des3_ede_x86_64_crypt_blk)
/* input:
* %rdi: round keys, CTX
* %rsi: dst
@@ -244,7 +244,7 @@ ENTRY(des3_ede_x86_64_crypt_blk)
popq %rbx;
ret;
-ENDPROC(des3_ede_x86_64_crypt_blk)
+SYM_FUNC_END(des3_ede_x86_64_crypt_blk)
/***********************************************************************
* 3-way 3DES
@@ -418,7 +418,7 @@ ENDPROC(des3_ede_x86_64_crypt_blk)
#define __movq(src, dst) \
movq src, dst;
-ENTRY(des3_ede_x86_64_crypt_blk_3way)
+SYM_FUNC_START(des3_ede_x86_64_crypt_blk_3way)
/* input:
* %rdi: ctx, round keys
* %rsi: dst (3 blocks)
@@ -529,7 +529,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way)
popq %rbx;
ret;
-ENDPROC(des3_ede_x86_64_crypt_blk_3way)
+SYM_FUNC_END(des3_ede_x86_64_crypt_blk_3way)
.section .rodata, "a", @progbits
.align 16
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S
index 5d53effe8abe..bb9735fbb865 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_asm.S
+++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S
@@ -44,7 +44,7 @@
* T2
* T3
*/
-__clmul_gf128mul_ble:
+SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
movaps DATA, T1
pshufd $0b01001110, DATA, T2
pshufd $0b01001110, SHASH, T3
@@ -87,10 +87,10 @@ __clmul_gf128mul_ble:
pxor T2, T1
pxor T1, DATA
ret
-ENDPROC(__clmul_gf128mul_ble)
+SYM_FUNC_END(__clmul_gf128mul_ble)
/* void clmul_ghash_mul(char *dst, const u128 *shash) */
-ENTRY(clmul_ghash_mul)
+SYM_FUNC_START(clmul_ghash_mul)
FRAME_BEGIN
movups (%rdi), DATA
movups (%rsi), SHASH
@@ -101,13 +101,13 @@ ENTRY(clmul_ghash_mul)
movups DATA, (%rdi)
FRAME_END
ret
-ENDPROC(clmul_ghash_mul)
+SYM_FUNC_END(clmul_ghash_mul)
/*
* void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
* const u128 *shash);
*/
-ENTRY(clmul_ghash_update)
+SYM_FUNC_START(clmul_ghash_update)
FRAME_BEGIN
cmp $16, %rdx
jb .Lupdate_just_ret # check length
@@ -130,4 +130,4 @@ ENTRY(clmul_ghash_update)
.Lupdate_just_ret:
FRAME_END
ret
-ENDPROC(clmul_ghash_update)
+SYM_FUNC_END(clmul_ghash_update)
diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
index f7946ea1b704..b22c7b936272 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -69,7 +69,7 @@
*
* It's guaranteed that message_len % 16 == 0.
*/
-ENTRY(nh_avx2)
+SYM_FUNC_START(nh_avx2)
vmovdqu 0x00(KEY), K0
vmovdqu 0x10(KEY), K1
@@ -154,4 +154,4 @@ ENTRY(nh_avx2)
vpaddq T4, T0, T0
vmovdqu T0, (HASH)
ret
-ENDPROC(nh_avx2)
+SYM_FUNC_END(nh_avx2)
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
index 51f52d4ab4bb..d7ae22dd6683 100644
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ b/arch/x86/crypto/nh-sse2-x86_64.S
@@ -71,7 +71,7 @@
*
* It's guaranteed that message_len % 16 == 0.
*/
-ENTRY(nh_sse2)
+SYM_FUNC_START(nh_sse2)
movdqu 0x00(KEY), K0
movdqu 0x10(KEY), K1
@@ -120,4 +120,4 @@ ENTRY(nh_sse2)
movdqu T0, 0x00(HASH)
movdqu T1, 0x10(HASH)
ret
-ENDPROC(nh_sse2)
+SYM_FUNC_END(nh_sse2)
diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S
index 8b341bc29d41..d6063feda9da 100644
--- a/arch/x86/crypto/poly1305-avx2-x86_64.S
+++ b/arch/x86/crypto/poly1305-avx2-x86_64.S
@@ -79,7 +79,7 @@ ORMASK: .octa 0x00000000010000000000000001000000
#define d3 %r12
#define d4 %r13
-ENTRY(poly1305_4block_avx2)
+SYM_FUNC_START(poly1305_4block_avx2)
# %rdi: Accumulator h[5]
# %rsi: 64 byte input block m
# %rdx: Poly1305 key r[5]
@@ -387,4 +387,4 @@ ENTRY(poly1305_4block_avx2)
pop %r12
pop %rbx
ret
-ENDPROC(poly1305_4block_avx2)
+SYM_FUNC_END(poly1305_4block_avx2)
diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S
index 5578f846e622..d8ea29b96640 100644
--- a/arch/x86/crypto/poly1305-sse2-x86_64.S
+++ b/arch/x86/crypto/poly1305-sse2-x86_64.S
@@ -46,7 +46,7 @@ ORMASK: .octa 0x00000000010000000000000001000000
#define d3 %r11
#define d4 %r12
-ENTRY(poly1305_block_sse2)
+SYM_FUNC_START(poly1305_block_sse2)
# %rdi: Accumulator h[5]
# %rsi: 16 byte input block m
# %rdx: Poly1305 key r[5]
@@ -276,7 +276,7 @@ ENTRY(poly1305_block_sse2)
pop %r12
pop %rbx
ret
-ENDPROC(poly1305_block_sse2)
+SYM_FUNC_END(poly1305_block_sse2)
#define u0 0x00(%r8)
@@ -301,7 +301,7 @@ ENDPROC(poly1305_block_sse2)
#undef d0
#define d0 %r13
-ENTRY(poly1305_2block_sse2)
+SYM_FUNC_START(poly1305_2block_sse2)
# %rdi: Accumulator h[5]
# %rsi: 16 byte input block m
# %rdx: Poly1305 key r[5]
@@ -587,4 +587,4 @@ ENTRY(poly1305_2block_sse2)
pop %r12
pop %rbx
ret
-ENDPROC(poly1305_2block_sse2)
+SYM_FUNC_END(poly1305_2block_sse2)
diff --git a/arch/x86/crypto/poly1305_glue.c b/arch/x86/crypto/poly1305_glue.c
index 4a1c05dce950..370cd88068ec 100644
--- a/arch/x86/crypto/poly1305_glue.c
+++ b/arch/x86/crypto/poly1305_glue.c
@@ -7,47 +7,23 @@
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
+#include <crypto/internal/poly1305.h>
#include <crypto/internal/simd.h>
-#include <crypto/poly1305.h>
#include <linux/crypto.h>
+#include <linux/jump_label.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <asm/simd.h>
-struct poly1305_simd_desc_ctx {
- struct poly1305_desc_ctx base;
- /* derived key u set? */
- bool uset;
-#ifdef CONFIG_AS_AVX2
- /* derived keys r^3, r^4 set? */
- bool wset;
-#endif
- /* derived Poly1305 key r^2 */
- u32 u[5];
- /* ... silently appended r^3 and r^4 when using AVX2 */
-};
-
asmlinkage void poly1305_block_sse2(u32 *h, const u8 *src,
const u32 *r, unsigned int blocks);
asmlinkage void poly1305_2block_sse2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-#ifdef CONFIG_AS_AVX2
asmlinkage void poly1305_4block_avx2(u32 *h, const u8 *src, const u32 *r,
unsigned int blocks, const u32 *u);
-static bool poly1305_use_avx2;
-#endif
-
-static int poly1305_simd_init(struct shash_desc *desc)
-{
- struct poly1305_simd_desc_ctx *sctx = shash_desc_ctx(desc);
- sctx->uset = false;
-#ifdef CONFIG_AS_AVX2
- sctx->wset = false;
-#endif
-
- return crypto_poly1305_init(desc);
-}
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_simd);
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2);
static void poly1305_simd_mult(u32 *a, const u32 *b)
{
@@ -60,72 +36,85 @@ static void poly1305_simd_mult(u32 *a, const u32 *b)
poly1305_block_sse2(a, m, b, 1);
}
+static unsigned int poly1305_scalar_blocks(struct poly1305_desc_ctx *dctx,
+ const u8 *src, unsigned int srclen)
+{
+ unsigned int datalen;
+
+ if (unlikely(!dctx->sset)) {
+ datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
+ src += srclen - datalen;
+ srclen = datalen;
+ }
+ if (srclen >= POLY1305_BLOCK_SIZE) {
+ poly1305_core_blocks(&dctx->h, dctx->r, src,
+ srclen / POLY1305_BLOCK_SIZE, 1);
+ srclen %= POLY1305_BLOCK_SIZE;
+ }
+ return srclen;
+}
+
static unsigned int poly1305_simd_blocks(struct poly1305_desc_ctx *dctx,
const u8 *src, unsigned int srclen)
{
- struct poly1305_simd_desc_ctx *sctx;
unsigned int blocks, datalen;
- BUILD_BUG_ON(offsetof(struct poly1305_simd_desc_ctx, base));
- sctx = container_of(dctx, struct poly1305_simd_desc_ctx, base);
-
if (unlikely(!dctx->sset)) {
datalen = crypto_poly1305_setdesckey(dctx, src, srclen);
src += srclen - datalen;
srclen = datalen;
}
-#ifdef CONFIG_AS_AVX2
- if (poly1305_use_avx2 && srclen >= POLY1305_BLOCK_SIZE * 4) {
- if (unlikely(!sctx->wset)) {
- if (!sctx->uset) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ static_branch_likely(&poly1305_use_avx2) &&
+ srclen >= POLY1305_BLOCK_SIZE * 4) {
+ if (unlikely(dctx->rset < 4)) {
+ if (dctx->rset < 2) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
}
- memcpy(sctx->u + 5, sctx->u, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 5, dctx->r.r);
- memcpy(sctx->u + 10, sctx->u + 5, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u + 10, dctx->r.r);
- sctx->wset = true;
+ dctx->r[2] = dctx->r[1];
+ poly1305_simd_mult(dctx->r[2].r, dctx->r[0].r);
+ dctx->r[3] = dctx->r[2];
+ poly1305_simd_mult(dctx->r[3].r, dctx->r[0].r);
+ dctx->rset = 4;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 4);
- poly1305_4block_avx2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_4block_avx2(dctx->h.h, src, dctx->r[0].r, blocks,
+ dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 4 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 4 * blocks;
}
-#endif
+
if (likely(srclen >= POLY1305_BLOCK_SIZE * 2)) {
- if (unlikely(!sctx->uset)) {
- memcpy(sctx->u, dctx->r.r, sizeof(sctx->u));
- poly1305_simd_mult(sctx->u, dctx->r.r);
- sctx->uset = true;
+ if (unlikely(dctx->rset < 2)) {
+ dctx->r[1] = dctx->r[0];
+ poly1305_simd_mult(dctx->r[1].r, dctx->r[0].r);
+ dctx->rset = 2;
}
blocks = srclen / (POLY1305_BLOCK_SIZE * 2);
- poly1305_2block_sse2(dctx->h.h, src, dctx->r.r, blocks,
- sctx->u);
+ poly1305_2block_sse2(dctx->h.h, src, dctx->r[0].r,
+ blocks, dctx->r[1].r);
src += POLY1305_BLOCK_SIZE * 2 * blocks;
srclen -= POLY1305_BLOCK_SIZE * 2 * blocks;
}
if (srclen >= POLY1305_BLOCK_SIZE) {
- poly1305_block_sse2(dctx->h.h, src, dctx->r.r, 1);
+ poly1305_block_sse2(dctx->h.h, src, dctx->r[0].r, 1);
srclen -= POLY1305_BLOCK_SIZE;
}
return srclen;
}
-static int poly1305_simd_update(struct shash_desc *desc,
- const u8 *src, unsigned int srclen)
+void poly1305_init_arch(struct poly1305_desc_ctx *desc, const u8 *key)
{
- struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
- unsigned int bytes;
-
- /* kernel_fpu_begin/end is costly, use fallback for small updates */
- if (srclen <= 288 || !crypto_simd_usable())
- return crypto_poly1305_update(desc, src, srclen);
+ poly1305_init_generic(desc, key);
+}
+EXPORT_SYMBOL(poly1305_init_arch);
- kernel_fpu_begin();
+void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
+ unsigned int srclen)
+{
+ unsigned int bytes;
if (unlikely(dctx->buflen)) {
bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen);
@@ -135,34 +124,84 @@ static int poly1305_simd_update(struct shash_desc *desc,
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
- poly1305_simd_blocks(dctx, dctx->buf,
- POLY1305_BLOCK_SIZE);
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ poly1305_simd_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ kernel_fpu_end();
+ } else {
+ poly1305_scalar_blocks(dctx, dctx->buf,
+ POLY1305_BLOCK_SIZE);
+ }
dctx->buflen = 0;
}
}
if (likely(srclen >= POLY1305_BLOCK_SIZE)) {
- bytes = poly1305_simd_blocks(dctx, src, srclen);
+ if (static_branch_likely(&poly1305_use_simd) &&
+ likely(crypto_simd_usable())) {
+ kernel_fpu_begin();
+ bytes = poly1305_simd_blocks(dctx, src, srclen);
+ kernel_fpu_end();
+ } else {
+ bytes = poly1305_scalar_blocks(dctx, src, srclen);
+ }
src += srclen - bytes;
srclen = bytes;
}
- kernel_fpu_end();
-
if (unlikely(srclen)) {
dctx->buflen = srclen;
memcpy(dctx->buf, src, srclen);
}
+}
+EXPORT_SYMBOL(poly1305_update_arch);
+
+void poly1305_final_arch(struct poly1305_desc_ctx *desc, u8 *digest)
+{
+ poly1305_final_generic(desc, digest);
+}
+EXPORT_SYMBOL(poly1305_final_arch);
+
+static int crypto_poly1305_init(struct shash_desc *desc)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ poly1305_core_init(&dctx->h);
+ dctx->buflen = 0;
+ dctx->rset = 0;
+ dctx->sset = false;
+
+ return 0;
+}
+
+static int crypto_poly1305_final(struct shash_desc *desc, u8 *dst)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+
+ if (unlikely(!dctx->sset))
+ return -ENOKEY;
+
+ poly1305_final_generic(dctx, dst);
+ return 0;
+}
+
+static int poly1305_simd_update(struct shash_desc *desc,
+ const u8 *src, unsigned int srclen)
+{
+ struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
+ poly1305_update_arch(dctx, src, srclen);
return 0;
}
static struct shash_alg alg = {
.digestsize = POLY1305_DIGEST_SIZE,
- .init = poly1305_simd_init,
+ .init = crypto_poly1305_init,
.update = poly1305_simd_update,
.final = crypto_poly1305_final,
- .descsize = sizeof(struct poly1305_simd_desc_ctx),
+ .descsize = sizeof(struct poly1305_desc_ctx),
.base = {
.cra_name = "poly1305",
.cra_driver_name = "poly1305-simd",
@@ -175,16 +214,16 @@ static struct shash_alg alg = {
static int __init poly1305_simd_mod_init(void)
{
if (!boot_cpu_has(X86_FEATURE_XMM2))
- return -ENODEV;
-
-#ifdef CONFIG_AS_AVX2
- poly1305_use_avx2 = boot_cpu_has(X86_FEATURE_AVX) &&
- boot_cpu_has(X86_FEATURE_AVX2) &&
- cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL);
- alg.descsize = sizeof(struct poly1305_simd_desc_ctx);
- if (poly1305_use_avx2)
- alg.descsize += 10 * sizeof(u32);
-#endif
+ return 0;
+
+ static_branch_enable(&poly1305_use_simd);
+
+ if (IS_ENABLED(CONFIG_AS_AVX2) &&
+ boot_cpu_has(X86_FEATURE_AVX) &&
+ boot_cpu_has(X86_FEATURE_AVX2) &&
+ cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
+ static_branch_enable(&poly1305_use_avx2);
+
return crypto_register_shash(&alg);
}
diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index ddc51dbba3af..ba9e4c1e7f5c 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -555,7 +555,7 @@
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
-__serpent_enc_blk8_avx:
+SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -606,10 +606,10 @@ __serpent_enc_blk8_avx:
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
-ENDPROC(__serpent_enc_blk8_avx)
+SYM_FUNC_END(__serpent_enc_blk8_avx)
.align 8
-__serpent_dec_blk8_avx:
+SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: encrypted blocks
@@ -660,9 +660,9 @@ __serpent_dec_blk8_avx:
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;
-ENDPROC(__serpent_dec_blk8_avx)
+SYM_FUNC_END(__serpent_dec_blk8_avx)
-ENTRY(serpent_ecb_enc_8way_avx)
+SYM_FUNC_START(serpent_ecb_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -678,9 +678,9 @@ ENTRY(serpent_ecb_enc_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_ecb_enc_8way_avx)
+SYM_FUNC_END(serpent_ecb_enc_8way_avx)
-ENTRY(serpent_ecb_dec_8way_avx)
+SYM_FUNC_START(serpent_ecb_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -696,9 +696,9 @@ ENTRY(serpent_ecb_dec_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_ecb_dec_8way_avx)
+SYM_FUNC_END(serpent_ecb_dec_8way_avx)
-ENTRY(serpent_cbc_dec_8way_avx)
+SYM_FUNC_START(serpent_cbc_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -714,9 +714,9 @@ ENTRY(serpent_cbc_dec_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_cbc_dec_8way_avx)
+SYM_FUNC_END(serpent_cbc_dec_8way_avx)
-ENTRY(serpent_ctr_8way_avx)
+SYM_FUNC_START(serpent_ctr_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -734,9 +734,9 @@ ENTRY(serpent_ctr_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_ctr_8way_avx)
+SYM_FUNC_END(serpent_ctr_8way_avx)
-ENTRY(serpent_xts_enc_8way_avx)
+SYM_FUNC_START(serpent_xts_enc_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -756,9 +756,9 @@ ENTRY(serpent_xts_enc_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_xts_enc_8way_avx)
+SYM_FUNC_END(serpent_xts_enc_8way_avx)
-ENTRY(serpent_xts_dec_8way_avx)
+SYM_FUNC_START(serpent_xts_dec_8way_avx)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -778,4 +778,4 @@ ENTRY(serpent_xts_dec_8way_avx)
FRAME_END
ret;
-ENDPROC(serpent_xts_dec_8way_avx)
+SYM_FUNC_END(serpent_xts_dec_8way_avx)
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index 37bc1d48106c..c9648aeae705 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -561,7 +561,7 @@
transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
.align 8
-__serpent_enc_blk16:
+SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
@@ -612,10 +612,10 @@ __serpent_enc_blk16:
write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
-ENDPROC(__serpent_enc_blk16)
+SYM_FUNC_END(__serpent_enc_blk16)
.align 8
-__serpent_dec_blk16:
+SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
@@ -666,9 +666,9 @@ __serpent_dec_blk16:
write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;
-ENDPROC(__serpent_dec_blk16)
+SYM_FUNC_END(__serpent_dec_blk16)
-ENTRY(serpent_ecb_enc_16way)
+SYM_FUNC_START(serpent_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -688,9 +688,9 @@ ENTRY(serpent_ecb_enc_16way)
FRAME_END
ret;
-ENDPROC(serpent_ecb_enc_16way)
+SYM_FUNC_END(serpent_ecb_enc_16way)
-ENTRY(serpent_ecb_dec_16way)
+SYM_FUNC_START(serpent_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -710,9 +710,9 @@ ENTRY(serpent_ecb_dec_16way)
FRAME_END
ret;
-ENDPROC(serpent_ecb_dec_16way)
+SYM_FUNC_END(serpent_ecb_dec_16way)
-ENTRY(serpent_cbc_dec_16way)
+SYM_FUNC_START(serpent_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -733,9 +733,9 @@ ENTRY(serpent_cbc_dec_16way)
FRAME_END
ret;
-ENDPROC(serpent_cbc_dec_16way)
+SYM_FUNC_END(serpent_cbc_dec_16way)
-ENTRY(serpent_ctr_16way)
+SYM_FUNC_START(serpent_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -758,9 +758,9 @@ ENTRY(serpent_ctr_16way)
FRAME_END
ret;
-ENDPROC(serpent_ctr_16way)
+SYM_FUNC_END(serpent_ctr_16way)
-ENTRY(serpent_xts_enc_16way)
+SYM_FUNC_START(serpent_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -784,9 +784,9 @@ ENTRY(serpent_xts_enc_16way)
FRAME_END
ret;
-ENDPROC(serpent_xts_enc_16way)
+SYM_FUNC_END(serpent_xts_enc_16way)
-ENTRY(serpent_xts_dec_16way)
+SYM_FUNC_START(serpent_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
@@ -810,4 +810,4 @@ ENTRY(serpent_xts_dec_16way)
FRAME_END
ret;
-ENDPROC(serpent_xts_dec_16way)
+SYM_FUNC_END(serpent_xts_dec_16way)
diff --git a/arch/x86/crypto/serpent-sse2-i586-asm_32.S b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
index e5c4a4690ca9..6379b99cb722 100644
--- a/arch/x86/crypto/serpent-sse2-i586-asm_32.S
+++ b/arch/x86/crypto/serpent-sse2-i586-asm_32.S
@@ -497,7 +497,7 @@
pxor t0, x3; \
movdqu x3, (3*4*4)(out);
-ENTRY(__serpent_enc_blk_4way)
+SYM_FUNC_START(__serpent_enc_blk_4way)
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
@@ -559,9 +559,9 @@ ENTRY(__serpent_enc_blk_4way)
xor_blocks(%eax, RA, RB, RC, RD, RT0, RT1, RE);
ret;
-ENDPROC(__serpent_enc_blk_4way)
+SYM_FUNC_END(__serpent_enc_blk_4way)
-ENTRY(serpent_dec_blk_4way)
+SYM_FUNC_START(serpent_dec_blk_4way)
/* input:
* arg_ctx(%esp): ctx, CTX
* arg_dst(%esp): dst
@@ -613,4 +613,4 @@ ENTRY(serpent_dec_blk_4way)
write_blocks(%eax, RC, RD, RB, RE, RT0, RT1, RA);
ret;
-ENDPROC(serpent_dec_blk_4way)
+SYM_FUNC_END(serpent_dec_blk_4way)
diff --git a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
index 5e0b3a3e97af..efb6dc17dc90 100644
--- a/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-sse2-x86_64-asm_64.S
@@ -619,7 +619,7 @@
pxor t0, x3; \
movdqu x3, (3*4*4)(out);
-ENTRY(__serpent_enc_blk_8way)
+SYM_FUNC_START(__serpent_enc_blk_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -682,9 +682,9 @@ ENTRY(__serpent_enc_blk_8way)
xor_blocks(%rax, RA2, RB2, RC2, RD2, RK0, RK1, RK2);
ret;
-ENDPROC(__serpent_enc_blk_8way)
+SYM_FUNC_END(__serpent_enc_blk_8way)
-ENTRY(serpent_dec_blk_8way)
+SYM_FUNC_START(serpent_dec_blk_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -736,4 +736,4 @@ ENTRY(serpent_dec_blk_8way)
write_blocks(%rax, RC2, RD2, RB2, RE2, RK0, RK1, RK2);
ret;
-ENDPROC(serpent_dec_blk_8way)
+SYM_FUNC_END(serpent_dec_blk_8way)
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
index 9f712a7dfd79..6decc85ef7b7 100644
--- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S
+++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S
@@ -634,7 +634,7 @@ _loop3:
* param: function's name
*/
.macro SHA1_VECTOR_ASM name
- ENTRY(\name)
+ SYM_FUNC_START(\name)
push %rbx
push %r12
@@ -676,7 +676,7 @@ _loop3:
ret
- ENDPROC(\name)
+ SYM_FUNC_END(\name)
.endm
.section .rodata
diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index ebbdba72ae07..11efe3a45a1f 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -95,7 +95,7 @@
*/
.text
.align 32
-ENTRY(sha1_ni_transform)
+SYM_FUNC_START(sha1_ni_transform)
mov %rsp, RSPSAVE
sub $FRAME_SIZE, %rsp
and $~0xF, %rsp
@@ -291,7 +291,7 @@ ENTRY(sha1_ni_transform)
mov RSPSAVE, %rsp
ret
-ENDPROC(sha1_ni_transform)
+SYM_FUNC_END(sha1_ni_transform)
.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16
.align 16
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 99c5b8c4dc38..5d03c1173690 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -67,7 +67,7 @@
* param: function's name
*/
.macro SHA1_VECTOR_ASM name
- ENTRY(\name)
+ SYM_FUNC_START(\name)
push %rbx
push %r12
@@ -101,7 +101,7 @@
pop %rbx
ret
- ENDPROC(\name)
+ SYM_FUNC_END(\name)
.endm
/*
diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 001bbcf93c79..22e14c8dd2e4 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -347,7 +347,7 @@ a = TMP_
## arg 3 : Num blocks
########################################################################
.text
-ENTRY(sha256_transform_avx)
+SYM_FUNC_START(sha256_transform_avx)
.align 32
pushq %rbx
pushq %r12
@@ -460,7 +460,7 @@ done_hash:
popq %r12
popq %rbx
ret
-ENDPROC(sha256_transform_avx)
+SYM_FUNC_END(sha256_transform_avx)
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 1420db15dcdd..519b551ad576 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -526,7 +526,7 @@ STACK_SIZE = _RSP + _RSP_SIZE
## arg 3 : Num blocks
########################################################################
.text
-ENTRY(sha256_transform_rorx)
+SYM_FUNC_START(sha256_transform_rorx)
.align 32
pushq %rbx
pushq %r12
@@ -713,7 +713,7 @@ done_hash:
popq %r12
popq %rbx
ret
-ENDPROC(sha256_transform_rorx)
+SYM_FUNC_END(sha256_transform_rorx)
.section .rodata.cst512.K256, "aM", @progbits, 512
.align 64
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index c6c05ed2c16a..69cc2f91dc4c 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -353,7 +353,7 @@ a = TMP_
## arg 3 : Num blocks
########################################################################
.text
-ENTRY(sha256_transform_ssse3)
+SYM_FUNC_START(sha256_transform_ssse3)
.align 32
pushq %rbx
pushq %r12
@@ -471,7 +471,7 @@ done_hash:
popq %rbx
ret
-ENDPROC(sha256_transform_ssse3)
+SYM_FUNC_END(sha256_transform_ssse3)
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index fb58f58ecfbc..7abade04a3a3 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -97,7 +97,7 @@
.text
.align 32
-ENTRY(sha256_ni_transform)
+SYM_FUNC_START(sha256_ni_transform)
shl $6, NUM_BLKS /* convert to bytes */
jz .Ldone_hash
@@ -327,7 +327,7 @@ ENTRY(sha256_ni_transform)
.Ldone_hash:
ret
-ENDPROC(sha256_ni_transform)
+SYM_FUNC_END(sha256_ni_transform)
.section .rodata.cst256.K256, "aM", @progbits, 256
.align 64
diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 39235fefe6f7..3704ddd7e5d5 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -277,7 +277,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# message blocks.
# L is the message length in SHA512 blocks
########################################################################
-ENTRY(sha512_transform_avx)
+SYM_FUNC_START(sha512_transform_avx)
cmp $0, msglen
je nowork
@@ -365,7 +365,7 @@ updateblock:
nowork:
ret
-ENDPROC(sha512_transform_avx)
+SYM_FUNC_END(sha512_transform_avx)
########################################################################
### Binary Data
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index b16d56005162..80d830e7ee09 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -569,7 +569,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# message blocks.
# L is the message length in SHA512 blocks
########################################################################
-ENTRY(sha512_transform_rorx)
+SYM_FUNC_START(sha512_transform_rorx)
# Allocate Stack Space
mov %rsp, %rax
sub $frame_size, %rsp
@@ -682,7 +682,7 @@ done_hash:
# Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp
ret
-ENDPROC(sha512_transform_rorx)
+SYM_FUNC_END(sha512_transform_rorx)
########################################################################
### Binary Data
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index 66bbd9058a90..838f984e95d9 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -275,7 +275,7 @@ frame_size = frame_GPRSAVE + GPRSAVE_SIZE
# message blocks.
# L is the message length in SHA512 blocks.
########################################################################
-ENTRY(sha512_transform_ssse3)
+SYM_FUNC_START(sha512_transform_ssse3)
cmp $0, msglen
je nowork
@@ -364,7 +364,7 @@ updateblock:
nowork:
ret
-ENDPROC(sha512_transform_ssse3)
+SYM_FUNC_END(sha512_transform_ssse3)
########################################################################
### Binary Data
diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 698b8f2a56e2..a5151393bb2f 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -234,7 +234,7 @@
vpxor x3, wkey, x3;
.align 8
-__twofish_enc_blk8:
+SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
/* input:
* %rdi: ctx, CTX
* RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: blocks
@@ -273,10 +273,10 @@ __twofish_enc_blk8:
outunpack_blocks(RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
ret;
-ENDPROC(__twofish_enc_blk8)
+SYM_FUNC_END(__twofish_enc_blk8)
.align 8
-__twofish_dec_blk8:
+SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
/* input:
* %rdi: ctx, CTX
* RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2: encrypted blocks
@@ -313,9 +313,9 @@ __twofish_dec_blk8:
outunpack_blocks(RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
ret;
-ENDPROC(__twofish_dec_blk8)
+SYM_FUNC_END(__twofish_dec_blk8)
-ENTRY(twofish_ecb_enc_8way)
+SYM_FUNC_START(twofish_ecb_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -333,9 +333,9 @@ ENTRY(twofish_ecb_enc_8way)
FRAME_END
ret;
-ENDPROC(twofish_ecb_enc_8way)
+SYM_FUNC_END(twofish_ecb_enc_8way)
-ENTRY(twofish_ecb_dec_8way)
+SYM_FUNC_START(twofish_ecb_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -353,9 +353,9 @@ ENTRY(twofish_ecb_dec_8way)
FRAME_END
ret;
-ENDPROC(twofish_ecb_dec_8way)
+SYM_FUNC_END(twofish_ecb_dec_8way)
-ENTRY(twofish_cbc_dec_8way)
+SYM_FUNC_START(twofish_cbc_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -378,9 +378,9 @@ ENTRY(twofish_cbc_dec_8way)
FRAME_END
ret;
-ENDPROC(twofish_cbc_dec_8way)
+SYM_FUNC_END(twofish_cbc_dec_8way)
-ENTRY(twofish_ctr_8way)
+SYM_FUNC_START(twofish_ctr_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -405,9 +405,9 @@ ENTRY(twofish_ctr_8way)
FRAME_END
ret;
-ENDPROC(twofish_ctr_8way)
+SYM_FUNC_END(twofish_ctr_8way)
-ENTRY(twofish_xts_enc_8way)
+SYM_FUNC_START(twofish_xts_enc_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -429,9 +429,9 @@ ENTRY(twofish_xts_enc_8way)
FRAME_END
ret;
-ENDPROC(twofish_xts_enc_8way)
+SYM_FUNC_END(twofish_xts_enc_8way)
-ENTRY(twofish_xts_dec_8way)
+SYM_FUNC_START(twofish_xts_dec_8way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -453,4 +453,4 @@ ENTRY(twofish_xts_dec_8way)
FRAME_END
ret;
-ENDPROC(twofish_xts_dec_8way)
+SYM_FUNC_END(twofish_xts_dec_8way)
diff --git a/arch/x86/crypto/twofish-i586-asm_32.S b/arch/x86/crypto/twofish-i586-asm_32.S
index 290cc4e9a6fe..a6f09e4f2e46 100644
--- a/arch/x86/crypto/twofish-i586-asm_32.S
+++ b/arch/x86/crypto/twofish-i586-asm_32.S
@@ -207,7 +207,7 @@
xor %esi, d ## D;\
ror $1, d ## D;
-ENTRY(twofish_enc_blk)
+SYM_FUNC_START(twofish_enc_blk)
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
@@ -261,9 +261,9 @@ ENTRY(twofish_enc_blk)
pop %ebp
mov $1, %eax
ret
-ENDPROC(twofish_enc_blk)
+SYM_FUNC_END(twofish_enc_blk)
-ENTRY(twofish_dec_blk)
+SYM_FUNC_START(twofish_dec_blk)
push %ebp /* save registers according to calling convention*/
push %ebx
push %esi
@@ -318,4 +318,4 @@ ENTRY(twofish_dec_blk)
pop %ebp
mov $1, %eax
ret
-ENDPROC(twofish_dec_blk)
+SYM_FUNC_END(twofish_dec_blk)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
index e495e07c7f1b..fc23552afe37 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64-3way.S
@@ -220,7 +220,7 @@
rorq $32, RAB2; \
outunpack3(mov, RIO, 2, RAB, 2);
-ENTRY(__twofish_enc_blk_3way)
+SYM_FUNC_START(__twofish_enc_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -267,9 +267,9 @@ ENTRY(__twofish_enc_blk_3way)
popq %r12;
popq %r13;
ret;
-ENDPROC(__twofish_enc_blk_3way)
+SYM_FUNC_END(__twofish_enc_blk_3way)
-ENTRY(twofish_dec_blk_3way)
+SYM_FUNC_START(twofish_dec_blk_3way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
@@ -302,4 +302,4 @@ ENTRY(twofish_dec_blk_3way)
popq %r12;
popq %r13;
ret;
-ENDPROC(twofish_dec_blk_3way)
+SYM_FUNC_END(twofish_dec_blk_3way)
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index ecef2cb9f43f..d2e56232494a 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -202,7 +202,7 @@
xor %r8d, d ## D;\
ror $1, d ## D;
-ENTRY(twofish_enc_blk)
+SYM_FUNC_START(twofish_enc_blk)
pushq R1
/* %rdi contains the ctx address */
@@ -253,9 +253,9 @@ ENTRY(twofish_enc_blk)
popq R1
movl $1,%eax
ret
-ENDPROC(twofish_enc_blk)
+SYM_FUNC_END(twofish_enc_blk)
-ENTRY(twofish_dec_blk)
+SYM_FUNC_START(twofish_dec_blk)
pushq R1
/* %rdi contains the ctx address */
@@ -305,4 +305,4 @@ ENTRY(twofish_dec_blk)
popq R1
movl $1,%eax
ret
-ENDPROC(twofish_dec_blk)
+SYM_FUNC_END(twofish_dec_blk)
diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 515c0ceeb4a3..0789e13ece90 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -354,7 +354,7 @@ For 32-bit we have the following conventions - kernel is built with
.macro CALL_enter_from_user_mode
#ifdef CONFIG_CONTEXT_TRACKING
#ifdef CONFIG_JUMP_LABEL
- STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_enabled, def=0
+ STATIC_JUMP_IF_FALSE .Lafter_call_\@, context_tracking_key, def=0
#endif
call enter_from_user_mode
.Lafter_call_\@:
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 3f8e22615812..9747876980b5 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -33,6 +33,7 @@
#include <asm/cpufeature.h>
#include <asm/fpu/api.h>
#include <asm/nospec-branch.h>
+#include <asm/io_bitmap.h>
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -196,6 +197,9 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
/* Reload ti->flags; we may have rescheduled above. */
cached_flags = READ_ONCE(ti->flags);
+ if (unlikely(cached_flags & _TIF_IO_BITMAP))
+ tss_update_io_bitmap();
+
fpregs_assert_state_consistent();
if (unlikely(cached_flags & _TIF_NEED_FPU_LOAD))
switch_fpu_return();
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index f83ca5aa8b77..5832b11f01bb 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -172,7 +172,7 @@
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
.if \no_user_check == 0
/* coming from usermode? */
- testl $SEGMENT_RPL_MASK, PT_CS(%esp)
+ testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp)
jz .Lend_\@
.endif
/* On user-cr3? */
@@ -205,64 +205,76 @@
#define CS_FROM_ENTRY_STACK (1 << 31)
#define CS_FROM_USER_CR3 (1 << 30)
#define CS_FROM_KERNEL (1 << 29)
+#define CS_FROM_ESPFIX (1 << 28)
.macro FIXUP_FRAME
/*
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
* Clear them in case hardware didn't do this for us.
*/
- andl $0x0000ffff, 3*4(%esp)
+ andl $0x0000ffff, 4*4(%esp)
#ifdef CONFIG_VM86
- testl $X86_EFLAGS_VM, 4*4(%esp)
+ testl $X86_EFLAGS_VM, 5*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
#endif
- testl $SEGMENT_RPL_MASK, 3*4(%esp)
+ testl $USER_SEGMENT_RPL_MASK, 4*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
- orl $CS_FROM_KERNEL, 3*4(%esp)
+ orl $CS_FROM_KERNEL, 4*4(%esp)
/*
* When we're here from kernel mode; the (exception) stack looks like:
*
- * 5*4(%esp) - <previous context>
- * 4*4(%esp) - flags
- * 3*4(%esp) - cs
- * 2*4(%esp) - ip
- * 1*4(%esp) - orig_eax
- * 0*4(%esp) - gs / function
+ * 6*4(%esp) - <previous context>
+ * 5*4(%esp) - flags
+ * 4*4(%esp) - cs
+ * 3*4(%esp) - ip
+ * 2*4(%esp) - orig_eax
+ * 1*4(%esp) - gs / function
+ * 0*4(%esp) - fs
*
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
* is complete and in particular regs->sp is correct. This gives us
- * the original 5 enties as gap:
+ * the original 6 enties as gap:
*
- * 12*4(%esp) - <previous context>
- * 11*4(%esp) - gap / flags
- * 10*4(%esp) - gap / cs
- * 9*4(%esp) - gap / ip
- * 8*4(%esp) - gap / orig_eax
- * 7*4(%esp) - gap / gs / function
- * 6*4(%esp) - ss
- * 5*4(%esp) - sp
- * 4*4(%esp) - flags
- * 3*4(%esp) - cs
- * 2*4(%esp) - ip
- * 1*4(%esp) - orig_eax
- * 0*4(%esp) - gs / function
+ * 14*4(%esp) - <previous context>
+ * 13*4(%esp) - gap / flags
+ * 12*4(%esp) - gap / cs
+ * 11*4(%esp) - gap / ip
+ * 10*4(%esp) - gap / orig_eax
+ * 9*4(%esp) - gap / gs / function
+ * 8*4(%esp) - gap / fs
+ * 7*4(%esp) - ss
+ * 6*4(%esp) - sp
+ * 5*4(%esp) - flags
+ * 4*4(%esp) - cs
+ * 3*4(%esp) - ip
+ * 2*4(%esp) - orig_eax
+ * 1*4(%esp) - gs / function
+ * 0*4(%esp) - fs
*/
pushl %ss # ss
pushl %esp # sp (points at ss)
- addl $6*4, (%esp) # point sp back at the previous context
- pushl 6*4(%esp) # flags
- pushl 6*4(%esp) # cs
- pushl 6*4(%esp) # ip
- pushl 6*4(%esp) # orig_eax
- pushl 6*4(%esp) # gs / function
+ addl $7*4, (%esp) # point sp back at the previous context
+ pushl 7*4(%esp) # flags
+ pushl 7*4(%esp) # cs
+ pushl 7*4(%esp) # ip
+ pushl 7*4(%esp) # orig_eax
+ pushl 7*4(%esp) # gs / function
+ pushl 7*4(%esp) # fs
.Lfrom_usermode_no_fixup_\@:
.endm
.macro IRET_FRAME
+ /*
+ * We're called with %ds, %es, %fs, and %gs from the interrupted
+ * frame, so we shouldn't use them. Also, we may be in ESPFIX
+ * mode and therefore have a nonzero SS base and an offset ESP,
+ * so any attempt to access the stack needs to use SS. (except for
+ * accesses through %esp, which automatically use SS.)
+ */
testl $CS_FROM_KERNEL, 1*4(%esp)
jz .Lfinished_frame_\@
@@ -276,31 +288,40 @@
movl 5*4(%esp), %eax # (modified) regs->sp
movl 4*4(%esp), %ecx # flags
- movl %ecx, -4(%eax)
+ movl %ecx, %ss:-1*4(%eax)
movl 3*4(%esp), %ecx # cs
andl $0x0000ffff, %ecx
- movl %ecx, -8(%eax)
+ movl %ecx, %ss:-2*4(%eax)
movl 2*4(%esp), %ecx # ip
- movl %ecx, -12(%eax)
+ movl %ecx, %ss:-3*4(%eax)
movl 1*4(%esp), %ecx # eax
- movl %ecx, -16(%eax)
+ movl %ecx, %ss:-4*4(%eax)
popl %ecx
- lea -16(%eax), %esp
+ lea -4*4(%eax), %esp
popl %eax
.Lfinished_frame_\@:
.endm
-.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
+.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
cld
.if \skip_gs == 0
PUSH_GS
.endif
- FIXUP_FRAME
pushl %fs
+
+ pushl %eax
+ movl $(__KERNEL_PERCPU), %eax
+ movl %eax, %fs
+.if \unwind_espfix > 0
+ UNWIND_ESPFIX_STACK
+.endif
+ popl %eax
+
+ FIXUP_FRAME
pushl %es
pushl %ds
pushl \pt_regs_ax
@@ -313,8 +334,6 @@
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
- movl $(__KERNEL_PERCPU), %edx
- movl %edx, %fs
.if \skip_gs == 0
SET_KERNEL_GS %edx
.endif
@@ -324,8 +343,8 @@
.endif
.endm
-.macro SAVE_ALL_NMI cr3_reg:req
- SAVE_ALL
+.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
+ SAVE_ALL unwind_espfix=\unwind_espfix
BUG_IF_WRONG_CR3
@@ -357,6 +376,7 @@
2: popl %es
3: popl %fs
POP_GS \pop
+ IRET_FRAME
.pushsection .fixup, "ax"
4: movl $0, (%esp)
jmp 1b
@@ -395,7 +415,8 @@
.macro CHECK_AND_APPLY_ESPFIX
#ifdef CONFIG_X86_ESPFIX32
-#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
+#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
@@ -709,7 +730,7 @@
* %eax: prev task
* %edx: next task
*/
-ENTRY(__switch_to_asm)
+SYM_CODE_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in struct inactive_task_frame
@@ -718,6 +739,11 @@ ENTRY(__switch_to_asm)
pushl %ebx
pushl %edi
pushl %esi
+ /*
+ * Flags are saved to prevent AC leakage. This could go
+ * away if objtool would have 32bit support to verify
+ * the STAC/CLAC correctness.
+ */
pushfl
/* switch stack */
@@ -740,15 +766,16 @@ ENTRY(__switch_to_asm)
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
#endif
- /* restore callee-saved registers */
+ /* Restore flags or the incoming task to restore AC state. */
popfl
+ /* restore callee-saved registers */
popl %esi
popl %edi
popl %ebx
popl %ebp
jmp __switch_to
-END(__switch_to_asm)
+SYM_CODE_END(__switch_to_asm)
/*
* The unwinder expects the last frame on the stack to always be at the same
@@ -757,7 +784,7 @@ END(__switch_to_asm)
* asmlinkage function so its argument has to be pushed on the stack. This
* wrapper creates a proper "end of stack" frame header before the call.
*/
-ENTRY(schedule_tail_wrapper)
+SYM_FUNC_START(schedule_tail_wrapper)
FRAME_BEGIN
pushl %eax
@@ -766,7 +793,7 @@ ENTRY(schedule_tail_wrapper)
FRAME_END
ret
-ENDPROC(schedule_tail_wrapper)
+SYM_FUNC_END(schedule_tail_wrapper)
/*
* A newly forked process directly context switches into this address.
*
@@ -774,7 +801,7 @@ ENDPROC(schedule_tail_wrapper)
* ebx: kernel thread func (NULL for user thread)
* edi: kernel thread arg
*/
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
call schedule_tail_wrapper
testl %ebx, %ebx
@@ -797,7 +824,7 @@ ENTRY(ret_from_fork)
*/
movl $0, PT_EAX(%esp)
jmp 2b
-END(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
/*
* Return to user mode is not as complex as all this looks,
@@ -807,8 +834,7 @@ END(ret_from_fork)
*/
# userspace resumption stub bypassing syscall exit tracing
- ALIGN
-ret_from_exception:
+SYM_CODE_START_LOCAL(ret_from_exception)
preempt_stop(CLBR_ANY)
ret_from_intr:
#ifdef CONFIG_VM86
@@ -825,15 +851,14 @@ ret_from_intr:
cmpl $USER_RPL, %eax
jb restore_all_kernel # not returning to v8086 or userspace
-ENTRY(resume_userspace)
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
movl %esp, %eax
call prepare_exit_to_usermode
jmp restore_all
-END(ret_from_exception)
+SYM_CODE_END(ret_from_exception)
-GLOBAL(__begin_SYSENTER_singlestep_region)
+SYM_ENTRY(__begin_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
/*
* All code from here through __end_SYSENTER_singlestep_region is subject
* to being single-stepped if a user program sets TF and executes SYSENTER.
@@ -848,9 +873,10 @@ GLOBAL(__begin_SYSENTER_singlestep_region)
* Xen doesn't set %esp to be precisely what the normal SYSENTER
* entry point expects, so fix it up before using the normal path.
*/
-ENTRY(xen_sysenter_target)
+SYM_CODE_START(xen_sysenter_target)
addl $5*4, %esp /* remove xen-provided frame */
jmp .Lsysenter_past_esp
+SYM_CODE_END(xen_sysenter_target)
#endif
/*
@@ -885,7 +911,7 @@ ENTRY(xen_sysenter_target)
* ebp user stack
* 0(%ebp) arg6
*/
-ENTRY(entry_SYSENTER_32)
+SYM_FUNC_START(entry_SYSENTER_32)
/*
* On entry-stack with all userspace-regs live - save and
* restore eflags and %eax to use it as scratch-reg for the cr3
@@ -1012,8 +1038,8 @@ ENTRY(entry_SYSENTER_32)
pushl $X86_EFLAGS_FIXED
popfl
jmp .Lsysenter_flags_fixed
-GLOBAL(__end_SYSENTER_singlestep_region)
-ENDPROC(entry_SYSENTER_32)
+SYM_ENTRY(__end_SYSENTER_singlestep_region, SYM_L_GLOBAL, SYM_A_NONE)
+SYM_FUNC_END(entry_SYSENTER_32)
/*
* 32-bit legacy system call entry.
@@ -1043,7 +1069,7 @@ ENDPROC(entry_SYSENTER_32)
* edi arg5
* ebp arg6
*/
-ENTRY(entry_INT80_32)
+SYM_FUNC_START(entry_INT80_32)
ASM_CLAC
pushl %eax /* pt_regs->orig_ax */
@@ -1075,7 +1101,6 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
- IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@@ -1100,7 +1125,7 @@ restore_all_kernel:
jmp .Lirq_return
.section .fixup, "ax"
-ENTRY(iret_exc )
+SYM_CODE_START(iret_exc)
pushl $0 # no error code
pushl $do_iret_error
@@ -1117,9 +1142,10 @@ ENTRY(iret_exc )
#endif
jmp common_exception
+SYM_CODE_END(iret_exc)
.previous
_ASM_EXTABLE(.Lirq_return, iret_exc)
-ENDPROC(entry_INT80_32)
+SYM_FUNC_END(entry_INT80_32)
.macro FIXUP_ESPFIX_STACK
/*
@@ -1128,30 +1154,43 @@ ENDPROC(entry_INT80_32)
* We can't call C functions using the ESPFIX stack. This code reads
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
+ *
+ * We might be on user CR3 here, so percpu data is not mapped and we can't
+ * access the GDT through the percpu segment. Instead, use SGDT to find
+ * the cpu_entry_area alias of the GDT.
*/
#ifdef CONFIG_X86_ESPFIX32
/* fixup the stack */
- mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
- mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
+ pushl %ecx
+ subl $2*4, %esp
+ sgdt (%esp)
+ movl 2(%esp), %ecx /* GDT address */
+ /*
+ * Careful: ECX is a linear pointer, so we need to force base
+ * zero. %cs is the only known-linear segment we have right now.
+ */
+ mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
+ mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
shl $16, %eax
+ addl $2*4, %esp
+ popl %ecx
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
pushl %eax
lss (%esp), %esp /* switch to the normal stack segment */
#endif
.endm
+
.macro UNWIND_ESPFIX_STACK
+ /* It's safe to clobber %eax, all other regs need to be preserved */
#ifdef CONFIG_X86_ESPFIX32
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
- jne 27f
- movl $__KERNEL_DS, %eax
- movl %eax, %ds
- movl %eax, %es
+ jne .Lno_fixup_\@
/* switch to normal stack */
FIXUP_ESPFIX_STACK
-27:
+.Lno_fixup_\@:
#endif
.endm
@@ -1160,7 +1199,7 @@ ENDPROC(entry_INT80_32)
* We pack 1 stub into every 8-byte block.
*/
.align 8
-ENTRY(irq_entries_start)
+SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
pushl $(~vector+0x80) /* Note: always in signed byte range */
@@ -1168,11 +1207,11 @@ ENTRY(irq_entries_start)
jmp common_interrupt
.align 8
.endr
-END(irq_entries_start)
+SYM_CODE_END(irq_entries_start)
#ifdef CONFIG_X86_LOCAL_APIC
.align 8
-ENTRY(spurious_entries_start)
+SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
pushl $(~vector+0x80) /* Note: always in signed byte range */
@@ -1180,9 +1219,9 @@ ENTRY(spurious_entries_start)
jmp common_spurious
.align 8
.endr
-END(spurious_entries_start)
+SYM_CODE_END(spurious_entries_start)
-common_spurious:
+SYM_CODE_START_LOCAL(common_spurious)
ASM_CLAC
addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
SAVE_ALL switch_stacks=1
@@ -1191,7 +1230,7 @@ common_spurious:
movl %esp, %eax
call smp_spurious_interrupt
jmp ret_from_intr
-ENDPROC(common_spurious)
+SYM_CODE_END(common_spurious)
#endif
/*
@@ -1199,7 +1238,7 @@ ENDPROC(common_spurious)
* so IRQ-flags tracing has to follow that:
*/
.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
+SYM_CODE_START_LOCAL(common_interrupt)
ASM_CLAC
addl $-0x80, (%esp) /* Adjust vector into the [-256, -1] range */
@@ -1209,10 +1248,10 @@ common_interrupt:
movl %esp, %eax
call do_IRQ
jmp ret_from_intr
-ENDPROC(common_interrupt)
+SYM_CODE_END(common_interrupt)
#define BUILD_INTERRUPT3(name, nr, fn) \
-ENTRY(name) \
+SYM_FUNC_START(name) \
ASM_CLAC; \
pushl $~(nr); \
SAVE_ALL switch_stacks=1; \
@@ -1221,7 +1260,7 @@ ENTRY(name) \
movl %esp, %eax; \
call fn; \
jmp ret_from_intr; \
-ENDPROC(name)
+SYM_FUNC_END(name)
#define BUILD_INTERRUPT(name, nr) \
BUILD_INTERRUPT3(name, nr, smp_##name); \
@@ -1229,14 +1268,14 @@ ENDPROC(name)
/* The include is where all of the SMP etc. interrupts come from */
#include <asm/entry_arch.h>
-ENTRY(coprocessor_error)
+SYM_CODE_START(coprocessor_error)
ASM_CLAC
pushl $0
pushl $do_coprocessor_error
jmp common_exception
-END(coprocessor_error)
+SYM_CODE_END(coprocessor_error)
-ENTRY(simd_coprocessor_error)
+SYM_CODE_START(simd_coprocessor_error)
ASM_CLAC
pushl $0
#ifdef CONFIG_X86_INVD_BUG
@@ -1248,104 +1287,99 @@ ENTRY(simd_coprocessor_error)
pushl $do_simd_coprocessor_error
#endif
jmp common_exception
-END(simd_coprocessor_error)
+SYM_CODE_END(simd_coprocessor_error)
-ENTRY(device_not_available)
+SYM_CODE_START(device_not_available)
ASM_CLAC
pushl $-1 # mark this as an int
pushl $do_device_not_available
jmp common_exception
-END(device_not_available)
+SYM_CODE_END(device_not_available)
#ifdef CONFIG_PARAVIRT
-ENTRY(native_iret)
+SYM_CODE_START(native_iret)
iret
_ASM_EXTABLE(native_iret, iret_exc)
-END(native_iret)
+SYM_CODE_END(native_iret)
#endif
-ENTRY(overflow)
+SYM_CODE_START(overflow)
ASM_CLAC
pushl $0
pushl $do_overflow
jmp common_exception
-END(overflow)
+SYM_CODE_END(overflow)
-ENTRY(bounds)
+SYM_CODE_START(bounds)
ASM_CLAC
pushl $0
pushl $do_bounds
jmp common_exception
-END(bounds)
+SYM_CODE_END(bounds)
-ENTRY(invalid_op)
+SYM_CODE_START(invalid_op)
ASM_CLAC
pushl $0
pushl $do_invalid_op
jmp common_exception
-END(invalid_op)
+SYM_CODE_END(invalid_op)
-ENTRY(coprocessor_segment_overrun)
+SYM_CODE_START(coprocessor_segment_overrun)
ASM_CLAC
pushl $0
pushl $do_coprocessor_segment_overrun
jmp common_exception
-END(coprocessor_segment_overrun)
+SYM_CODE_END(coprocessor_segment_overrun)
-ENTRY(invalid_TSS)
+SYM_CODE_START(invalid_TSS)
ASM_CLAC
pushl $do_invalid_TSS
jmp common_exception
-END(invalid_TSS)
+SYM_CODE_END(invalid_TSS)
-ENTRY(segment_not_present)
+SYM_CODE_START(segment_not_present)
ASM_CLAC
pushl $do_segment_not_present
jmp common_exception
-END(segment_not_present)
+SYM_CODE_END(segment_not_present)
-ENTRY(stack_segment)
+SYM_CODE_START(stack_segment)
ASM_CLAC
pushl $do_stack_segment
jmp common_exception
-END(stack_segment)
+SYM_CODE_END(stack_segment)
-ENTRY(alignment_check)
+SYM_CODE_START(alignment_check)
ASM_CLAC
pushl $do_alignment_check
jmp common_exception
-END(alignment_check)
+SYM_CODE_END(alignment_check)
-ENTRY(divide_error)
+SYM_CODE_START(divide_error)
ASM_CLAC
pushl $0 # no error code
pushl $do_divide_error
jmp common_exception
-END(divide_error)
+SYM_CODE_END(divide_error)
#ifdef CONFIG_X86_MCE
-ENTRY(machine_check)
+SYM_CODE_START(machine_check)
ASM_CLAC
pushl $0
pushl machine_check_vector
jmp common_exception
-END(machine_check)
+SYM_CODE_END(machine_check)
#endif
-ENTRY(spurious_interrupt_bug)
+SYM_CODE_START(spurious_interrupt_bug)
ASM_CLAC
pushl $0
pushl $do_spurious_interrupt_bug
jmp common_exception
-END(spurious_interrupt_bug)
+SYM_CODE_END(spurious_interrupt_bug)
#ifdef CONFIG_XEN_PV
-ENTRY(xen_hypervisor_callback)
- pushl $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
- ENCODE_FRAME_POINTER
- TRACE_IRQS_OFF
-
+SYM_FUNC_START(xen_hypervisor_callback)
/*
* Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled
@@ -1353,22 +1387,23 @@ ENTRY(xen_hypervisor_callback)
* iret instruction's behaviour where it delivers a
* pending interrupt when enabling interrupts:
*/
- movl PT_EIP(%esp), %eax
- cmpl $xen_iret_start_crit, %eax
+ cmpl $xen_iret_start_crit, (%esp)
jb 1f
- cmpl $xen_iret_end_crit, %eax
+ cmpl $xen_iret_end_crit, (%esp)
jae 1f
-
- jmp xen_iret_crit_fixup
-
-ENTRY(xen_do_upcall)
-1: mov %esp, %eax
+ call xen_iret_crit_fixup
+1:
+ pushl $-1 /* orig_ax = -1 => not a system call */
+ SAVE_ALL
+ ENCODE_FRAME_POINTER
+ TRACE_IRQS_OFF
+ mov %esp, %eax
call xen_evtchn_do_upcall
#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
#endif
jmp ret_from_intr
-ENDPROC(xen_hypervisor_callback)
+SYM_FUNC_END(xen_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
@@ -1382,7 +1417,7 @@ ENDPROC(xen_hypervisor_callback)
* to pop the stack frame we end up in an infinite loop of failsafe callbacks.
* We distinguish between categories by maintaining a status value in EAX.
*/
-ENTRY(xen_failsafe_callback)
+SYM_FUNC_START(xen_failsafe_callback)
pushl %eax
movl $1, %eax
1: mov 4(%esp), %ds
@@ -1419,7 +1454,7 @@ ENTRY(xen_failsafe_callback)
_ASM_EXTABLE(2b, 7b)
_ASM_EXTABLE(3b, 8b)
_ASM_EXTABLE(4b, 9b)
-ENDPROC(xen_failsafe_callback)
+SYM_FUNC_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
#ifdef CONFIG_XEN_PVHVM
@@ -1441,18 +1476,17 @@ BUILD_INTERRUPT3(hv_stimer0_callback_vector, HYPERV_STIMER0_VECTOR,
#endif /* CONFIG_HYPERV */
-ENTRY(page_fault)
+SYM_CODE_START(page_fault)
ASM_CLAC
pushl $do_page_fault
jmp common_exception_read_cr2
-END(page_fault)
+SYM_CODE_END(page_fault)
-common_exception_read_cr2:
+SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2)
/* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1
+ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
- UNWIND_ESPFIX_STACK
/* fixup %gs */
GS_TO_REG %ecx
@@ -1470,13 +1504,12 @@ common_exception_read_cr2:
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
jmp ret_from_exception
-END(common_exception_read_cr2)
+SYM_CODE_END(common_exception_read_cr2)
-common_exception:
+SYM_CODE_START_LOCAL_NOALIGN(common_exception)
/* the function address is in %gs's slot on the stack */
- SAVE_ALL switch_stacks=1 skip_gs=1
+ SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
- UNWIND_ESPFIX_STACK
/* fixup %gs */
GS_TO_REG %ecx
@@ -1492,9 +1525,9 @@ common_exception:
movl %esp, %eax # pt_regs pointer
CALL_NOSPEC %edi
jmp ret_from_exception
-END(common_exception)
+SYM_CODE_END(common_exception)
-ENTRY(debug)
+SYM_CODE_START(debug)
/*
* Entry from sysenter is now handled in common_exception
*/
@@ -1502,7 +1535,7 @@ ENTRY(debug)
pushl $-1 # mark this as an int
pushl $do_debug
jmp common_exception
-END(debug)
+SYM_CODE_END(debug)
/*
* NMI is doubly nasty. It can happen on the first instruction of
@@ -1511,10 +1544,14 @@ END(debug)
* switched stacks. We handle both conditions by simply checking whether we
* interrupted kernel code running on the SYSENTER stack.
*/
-ENTRY(nmi)
+SYM_CODE_START(nmi)
ASM_CLAC
#ifdef CONFIG_X86_ESPFIX32
+ /*
+ * ESPFIX_SS is only ever set on the return to user path
+ * after we've switched to the entry stack.
+ */
pushl %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
@@ -1550,6 +1587,11 @@ ENTRY(nmi)
movl %ebx, %esp
.Lnmi_return:
+#ifdef CONFIG_X86_ESPFIX32
+ testl $CS_FROM_ESPFIX, PT_CS(%esp)
+ jnz .Lnmi_from_espfix
+#endif
+
CHECK_AND_APPLY_ESPFIX
RESTORE_ALL_NMI cr3_reg=%edi pop=4
jmp .Lirq_return
@@ -1557,28 +1599,47 @@ ENTRY(nmi)
#ifdef CONFIG_X86_ESPFIX32
.Lnmi_espfix_stack:
/*
- * create the pointer to lss back
+ * Create the pointer to LSS back
*/
pushl %ss
pushl %esp
addl $4, (%esp)
- /* copy the iret frame of 12 bytes */
- .rept 3
- pushl 16(%esp)
- .endr
- pushl %eax
- SAVE_ALL_NMI cr3_reg=%edi
+
+ /* Copy the (short) IRET frame */
+ pushl 4*4(%esp) # flags
+ pushl 4*4(%esp) # cs
+ pushl 4*4(%esp) # ip
+
+ pushl %eax # orig_ax
+
+ SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
ENCODE_FRAME_POINTER
- FIXUP_ESPFIX_STACK # %eax == %esp
+
+ /* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
+ xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
+
xorl %edx, %edx # zero error code
- call do_nmi
+ movl %esp, %eax # pt_regs pointer
+ jmp .Lnmi_from_sysenter_stack
+
+.Lnmi_from_espfix:
RESTORE_ALL_NMI cr3_reg=%edi
- lss 12+4(%esp), %esp # back to espfix stack
+ /*
+ * Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
+ * fix up the gap and long frame:
+ *
+ * 3 - original frame (exception)
+ * 2 - ESPFIX block (above)
+ * 6 - gap (FIXUP_FRAME)
+ * 5 - long frame (FIXUP_FRAME)
+ * 1 - orig_ax
+ */
+ lss (1+5+6)*4(%esp), %esp # back to espfix stack
jmp .Lirq_return
#endif
-END(nmi)
+SYM_CODE_END(nmi)
-ENTRY(int3)
+SYM_CODE_START(int3)
ASM_CLAC
pushl $-1 # mark this as an int
@@ -1589,22 +1650,22 @@ ENTRY(int3)
movl %esp, %eax # pt_regs pointer
call do_int3
jmp ret_from_exception
-END(int3)
+SYM_CODE_END(int3)
-ENTRY(general_protection)
+SYM_CODE_START(general_protection)
pushl $do_general_protection
jmp common_exception
-END(general_protection)
+SYM_CODE_END(general_protection)
#ifdef CONFIG_KVM_GUEST
-ENTRY(async_page_fault)
+SYM_CODE_START(async_page_fault)
ASM_CLAC
pushl $do_async_page_fault
jmp common_exception_read_cr2
-END(async_page_fault)
+SYM_CODE_END(async_page_fault)
#endif
-ENTRY(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_do_exit)
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1613,4 +1674,4 @@ ENTRY(rewind_stack_do_exit)
call do_exit
1: jmp 1b
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b7c3ea4cb19d..76942cbd95a1 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -15,7 +15,7 @@
* at the top of the kernel process stack.
*
* Some macro usage:
- * - ENTRY/END: Define functions in the symbol table.
+ * - SYM_FUNC_START/END:Define functions in the symbol table.
* - TRACE_IRQ_*: Trace hardirq state for lock debugging.
* - idtentry: Define exception entry points.
*/
@@ -46,11 +46,11 @@
.section .entry.text, "ax"
#ifdef CONFIG_PARAVIRT
-ENTRY(native_usergs_sysret64)
+SYM_CODE_START(native_usergs_sysret64)
UNWIND_HINT_EMPTY
swapgs
sysretq
-END(native_usergs_sysret64)
+SYM_CODE_END(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
.macro TRACE_IRQS_FLAGS flags:req
@@ -142,7 +142,7 @@ END(native_usergs_sysret64)
* with them due to bugs in both AMD and Intel CPUs.
*/
-ENTRY(entry_SYSCALL_64)
+SYM_CODE_START(entry_SYSCALL_64)
UNWIND_HINT_EMPTY
/*
* Interrupts are off on entry.
@@ -162,7 +162,7 @@ ENTRY(entry_SYSCALL_64)
pushq %r11 /* pt_regs->flags */
pushq $__USER_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
-GLOBAL(entry_SYSCALL_64_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
pushq %rax /* pt_regs->orig_ax */
PUSH_AND_CLEAR_REGS rax=$-ENOSYS
@@ -273,13 +273,13 @@ syscall_return_via_sysret:
popq %rdi
popq %rsp
USERGS_SYSRET64
-END(entry_SYSCALL_64)
+SYM_CODE_END(entry_SYSCALL_64)
/*
* %rdi: prev task
* %rsi: next task
*/
-ENTRY(__switch_to_asm)
+SYM_CODE_START(__switch_to_asm)
UNWIND_HINT_FUNC
/*
* Save callee-saved registers
@@ -321,7 +321,7 @@ ENTRY(__switch_to_asm)
popq %rbp
jmp __switch_to
-END(__switch_to_asm)
+SYM_CODE_END(__switch_to_asm)
/*
* A newly forked process directly context switches into this address.
@@ -330,7 +330,7 @@ END(__switch_to_asm)
* rbx: kernel thread func (NULL for user thread)
* r12: kernel thread arg
*/
-ENTRY(ret_from_fork)
+SYM_CODE_START(ret_from_fork)
UNWIND_HINT_EMPTY
movq %rax, %rdi
call schedule_tail /* rdi: 'prev' task parameter */
@@ -357,14 +357,14 @@ ENTRY(ret_from_fork)
*/
movq $0, RAX(%rsp)
jmp 2b
-END(ret_from_fork)
+SYM_CODE_END(ret_from_fork)
/*
* Build the entry stubs with some assembler magic.
* We pack 1 stub into every 8-byte block.
*/
.align 8
-ENTRY(irq_entries_start)
+SYM_CODE_START(irq_entries_start)
vector=FIRST_EXTERNAL_VECTOR
.rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
UNWIND_HINT_IRET_REGS
@@ -373,10 +373,10 @@ ENTRY(irq_entries_start)
.align 8
vector=vector+1
.endr
-END(irq_entries_start)
+SYM_CODE_END(irq_entries_start)
.align 8
-ENTRY(spurious_entries_start)
+SYM_CODE_START(spurious_entries_start)
vector=FIRST_SYSTEM_VECTOR
.rept (NR_VECTORS - FIRST_SYSTEM_VECTOR)
UNWIND_HINT_IRET_REGS
@@ -385,7 +385,7 @@ ENTRY(spurious_entries_start)
.align 8
vector=vector+1
.endr
-END(spurious_entries_start)
+SYM_CODE_END(spurious_entries_start)
.macro DEBUG_ENTRY_ASSERT_IRQS_OFF
#ifdef CONFIG_DEBUG_ENTRY
@@ -511,7 +511,7 @@ END(spurious_entries_start)
* | return address |
* +----------------------------------------------------+
*/
-ENTRY(interrupt_entry)
+SYM_CODE_START(interrupt_entry)
UNWIND_HINT_FUNC
ASM_CLAC
cld
@@ -579,7 +579,7 @@ ENTRY(interrupt_entry)
TRACE_IRQS_OFF
ret
-END(interrupt_entry)
+SYM_CODE_END(interrupt_entry)
_ASM_NOKPROBE(interrupt_entry)
@@ -589,18 +589,18 @@ _ASM_NOKPROBE(interrupt_entry)
* The interrupt stubs push (~vector+0x80) onto the stack and
* then jump to common_spurious/interrupt.
*/
-common_spurious:
+SYM_CODE_START_LOCAL(common_spurious)
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
call smp_spurious_interrupt /* rdi points to pt_regs */
jmp ret_from_intr
-END(common_spurious)
+SYM_CODE_END(common_spurious)
_ASM_NOKPROBE(common_spurious)
/* common_interrupt is a hotpath. Align it */
.p2align CONFIG_X86_L1_CACHE_SHIFT
-common_interrupt:
+SYM_CODE_START_LOCAL(common_interrupt)
addq $-0x80, (%rsp) /* Adjust vector to [-256, -1] range */
call interrupt_entry
UNWIND_HINT_REGS indirect=1
@@ -616,12 +616,12 @@ ret_from_intr:
jz retint_kernel
/* Interrupt came from user space */
-GLOBAL(retint_user)
+.Lretint_user:
mov %rsp,%rdi
call prepare_exit_to_usermode
TRACE_IRQS_IRETQ
-GLOBAL(swapgs_restore_regs_and_return_to_usermode)
+SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
@@ -679,7 +679,7 @@ retint_kernel:
*/
TRACE_IRQS_IRETQ
-GLOBAL(restore_regs_and_return_to_kernel)
+SYM_INNER_LABEL(restore_regs_and_return_to_kernel, SYM_L_GLOBAL)
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates kernel mode. */
testb $3, CS(%rsp)
@@ -695,7 +695,7 @@ GLOBAL(restore_regs_and_return_to_kernel)
*/
INTERRUPT_RETURN
-ENTRY(native_iret)
+SYM_INNER_LABEL_ALIGN(native_iret, SYM_L_GLOBAL)
UNWIND_HINT_IRET_REGS
/*
* Are we returning to a stack segment from the LDT? Note: in
@@ -706,8 +706,7 @@ ENTRY(native_iret)
jnz native_irq_return_ldt
#endif
-.global native_irq_return_iret
-native_irq_return_iret:
+SYM_INNER_LABEL(native_irq_return_iret, SYM_L_GLOBAL)
/*
* This may fault. Non-paranoid faults on return to userspace are
* handled by fixup_bad_iret. These include #SS, #GP, and #NP.
@@ -789,14 +788,14 @@ native_irq_return_ldt:
*/
jmp native_irq_return_iret
#endif
-END(common_interrupt)
+SYM_CODE_END(common_interrupt)
_ASM_NOKPROBE(common_interrupt)
/*
* APIC interrupts.
*/
.macro apicinterrupt3 num sym do_sym
-ENTRY(\sym)
+SYM_CODE_START(\sym)
UNWIND_HINT_IRET_REGS
pushq $~(\num)
.Lcommon_\sym:
@@ -804,7 +803,7 @@ ENTRY(\sym)
UNWIND_HINT_REGS indirect=1
call \do_sym /* rdi points to pt_regs */
jmp ret_from_intr
-END(\sym)
+SYM_CODE_END(\sym)
_ASM_NOKPROBE(\sym)
.endm
@@ -969,7 +968,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
*/
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0 create_gap=0 read_cr2=0
-ENTRY(\sym)
+SYM_CODE_START(\sym)
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
/* Sanity check */
@@ -1019,7 +1018,7 @@ ENTRY(\sym)
.endif
_ASM_NOKPROBE(\sym)
-END(\sym)
+SYM_CODE_END(\sym)
.endm
idtentry divide_error do_divide_error has_error_code=0
@@ -1041,7 +1040,7 @@ idtentry simd_coprocessor_error do_simd_coprocessor_error has_error_code=0
* Reload gs selector with exception handling
* edi: new selector
*/
-ENTRY(native_load_gs_index)
+SYM_FUNC_START(native_load_gs_index)
FRAME_BEGIN
pushfq
DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
@@ -1055,13 +1054,13 @@ ENTRY(native_load_gs_index)
popfq
FRAME_END
ret
-ENDPROC(native_load_gs_index)
+SYM_FUNC_END(native_load_gs_index)
EXPORT_SYMBOL(native_load_gs_index)
_ASM_EXTABLE(.Lgs_change, .Lbad_gs)
.section .fixup, "ax"
/* running with kernelgs */
-.Lbad_gs:
+SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
SWAPGS /* switch back to user gs */
.macro ZAP_GS
/* This can't be a string because the preprocessor needs to see it. */
@@ -1072,10 +1071,11 @@ EXPORT_SYMBOL(native_load_gs_index)
xorl %eax, %eax
movl %eax, %gs
jmp 2b
+SYM_CODE_END(.Lbad_gs)
.previous
/* Call softirq on interrupt stack. Interrupts are off. */
-ENTRY(do_softirq_own_stack)
+SYM_FUNC_START(do_softirq_own_stack)
pushq %rbp
mov %rsp, %rbp
ENTER_IRQ_STACK regs=0 old_rsp=%r11
@@ -1083,7 +1083,7 @@ ENTRY(do_softirq_own_stack)
LEAVE_IRQ_STACK regs=0
leaveq
ret
-ENDPROC(do_softirq_own_stack)
+SYM_FUNC_END(do_softirq_own_stack)
#ifdef CONFIG_XEN_PV
idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
@@ -1101,7 +1101,8 @@ idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
* existing activation in its critical region -- if so, we pop the current
* activation and restart the handler using the previous one.
*/
-ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
+/* do_hypervisor_callback(struct *pt_regs) */
+SYM_CODE_START_LOCAL(xen_do_hypervisor_callback)
/*
* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -1119,7 +1120,7 @@ ENTRY(xen_do_hypervisor_callback) /* do_hypervisor_callback(struct *pt_regs) */
call xen_maybe_preempt_hcall
#endif
jmp error_exit
-END(xen_do_hypervisor_callback)
+SYM_CODE_END(xen_do_hypervisor_callback)
/*
* Hypervisor uses this for application faults while it executes.
@@ -1134,7 +1135,7 @@ END(xen_do_hypervisor_callback)
* We distinguish between categories by comparing each saved segment register
* with its current contents: any discrepancy means we in category 1.
*/
-ENTRY(xen_failsafe_callback)
+SYM_CODE_START(xen_failsafe_callback)
UNWIND_HINT_EMPTY
movl %ds, %ecx
cmpw %cx, 0x10(%rsp)
@@ -1164,7 +1165,7 @@ ENTRY(xen_failsafe_callback)
PUSH_AND_CLEAR_REGS
ENCODE_FRAME_POINTER
jmp error_exit
-END(xen_failsafe_callback)
+SYM_CODE_END(xen_failsafe_callback)
#endif /* CONFIG_XEN_PV */
#ifdef CONFIG_XEN_PVHVM
@@ -1214,7 +1215,7 @@ idtentry machine_check do_mce has_error_code=0 paranoid=1
* Use slow, but surefire "are we in kernel?" check.
* Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
-ENTRY(paranoid_entry)
+SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
cld
PUSH_AND_CLEAR_REGS save_ret=1
@@ -1248,7 +1249,7 @@ ENTRY(paranoid_entry)
FENCE_SWAPGS_KERNEL_ENTRY
ret
-END(paranoid_entry)
+SYM_CODE_END(paranoid_entry)
/*
* "Paranoid" exit path from exception stack. This is invoked
@@ -1262,7 +1263,7 @@ END(paranoid_entry)
*
* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it)
*/
-ENTRY(paranoid_exit)
+SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF_DEBUG
@@ -1272,19 +1273,18 @@ ENTRY(paranoid_exit)
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
SWAPGS_UNSAFE_STACK
- jmp .Lparanoid_exit_restore
+ jmp restore_regs_and_return_to_kernel
.Lparanoid_exit_no_swapgs:
TRACE_IRQS_IRETQ_DEBUG
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%rbx save_reg=%r14
-.Lparanoid_exit_restore:
jmp restore_regs_and_return_to_kernel
-END(paranoid_exit)
+SYM_CODE_END(paranoid_exit)
/*
* Save all registers in pt_regs, and switch GS if needed.
*/
-ENTRY(error_entry)
+SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
cld
PUSH_AND_CLEAR_REGS save_ret=1
@@ -1364,16 +1364,16 @@ ENTRY(error_entry)
call fixup_bad_iret
mov %rax, %rsp
jmp .Lerror_entry_from_usermode_after_swapgs
-END(error_entry)
+SYM_CODE_END(error_entry)
-ENTRY(error_exit)
+SYM_CODE_START_LOCAL(error_exit)
UNWIND_HINT_REGS
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
testb $3, CS(%rsp)
jz retint_kernel
- jmp retint_user
-END(error_exit)
+ jmp .Lretint_user
+SYM_CODE_END(error_exit)
/*
* Runs on exception stack. Xen PV does not go through this path at all,
@@ -1383,7 +1383,7 @@ END(error_exit)
* %r14: Used to save/restore the CR3 of the interrupted context
* when PAGE_TABLE_ISOLATION is in use. Do not clobber.
*/
-ENTRY(nmi)
+SYM_CODE_START(nmi)
UNWIND_HINT_IRET_REGS
/*
@@ -1718,21 +1718,21 @@ nmi_restore:
* about espfix64 on the way back to kernel mode.
*/
iretq
-END(nmi)
+SYM_CODE_END(nmi)
#ifndef CONFIG_IA32_EMULATION
/*
* This handles SYSCALL from 32-bit code. There is no way to program
* MSRs to fully disable 32-bit SYSCALL.
*/
-ENTRY(ignore_sysret)
+SYM_CODE_START(ignore_sysret)
UNWIND_HINT_EMPTY
mov $-ENOSYS, %eax
sysret
-END(ignore_sysret)
+SYM_CODE_END(ignore_sysret)
#endif
-ENTRY(rewind_stack_do_exit)
+SYM_CODE_START(rewind_stack_do_exit)
UNWIND_HINT_FUNC
/* Prevent any naive code from trying to unwind to our caller. */
xorl %ebp, %ebp
@@ -1742,4 +1742,4 @@ ENTRY(rewind_stack_do_exit)
UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
call do_exit
-END(rewind_stack_do_exit)
+SYM_CODE_END(rewind_stack_do_exit)
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 39913770a44d..f1d3ccae5dd5 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -46,7 +46,7 @@
* ebp user stack
* 0(%ebp) arg6
*/
-ENTRY(entry_SYSENTER_compat)
+SYM_FUNC_START(entry_SYSENTER_compat)
/* Interrupts are off on entry. */
SWAPGS
@@ -146,8 +146,8 @@ ENTRY(entry_SYSENTER_compat)
pushq $X86_EFLAGS_FIXED
popfq
jmp .Lsysenter_flags_fixed
-GLOBAL(__end_entry_SYSENTER_compat)
-ENDPROC(entry_SYSENTER_compat)
+SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
+SYM_FUNC_END(entry_SYSENTER_compat)
/*
* 32-bit SYSCALL entry.
@@ -196,7 +196,7 @@ ENDPROC(entry_SYSENTER_compat)
* esp user stack
* 0(%esp) arg6
*/
-ENTRY(entry_SYSCALL_compat)
+SYM_CODE_START(entry_SYSCALL_compat)
/* Interrupts are off on entry. */
swapgs
@@ -215,7 +215,7 @@ ENTRY(entry_SYSCALL_compat)
pushq %r11 /* pt_regs->flags */
pushq $__USER32_CS /* pt_regs->cs */
pushq %rcx /* pt_regs->ip */
-GLOBAL(entry_SYSCALL_compat_after_hwframe)
+SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL)
movl %eax, %eax /* discard orig_ax high bits */
pushq %rax /* pt_regs->orig_ax */
pushq %rdi /* pt_regs->di */
@@ -311,7 +311,7 @@ sysret32_from_system_call:
xorl %r10d, %r10d
swapgs
sysretl
-END(entry_SYSCALL_compat)
+SYM_CODE_END(entry_SYSCALL_compat)
/*
* 32-bit legacy system call entry.
@@ -339,7 +339,7 @@ END(entry_SYSCALL_compat)
* edi arg5
* ebp arg6
*/
-ENTRY(entry_INT80_compat)
+SYM_CODE_START(entry_INT80_compat)
/*
* Interrupts are off on entry.
*/
@@ -416,4 +416,4 @@ ENTRY(entry_INT80_compat)
/* Go back to user mode. */
TRACE_IRQS_ON
jmp swapgs_restore_regs_and_return_to_usermode
-END(entry_INT80_compat)
+SYM_CODE_END(entry_INT80_compat)
diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c
index aa3336a7cb15..7d17b3addbbb 100644
--- a/arch/x86/entry/syscall_32.c
+++ b/arch/x86/entry/syscall_32.c
@@ -10,13 +10,11 @@
#ifdef CONFIG_IA32_EMULATION
/* On X86_64, we use struct pt_regs * to pass parameters to syscalls */
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
-
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
-
+#define __sys_ni_syscall __ia32_sys_ni_syscall
#else /* CONFIG_IA32_EMULATION */
#define __SYSCALL_I386(nr, sym, qual) extern asmlinkage long sym(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
extern asmlinkage long sys_ni_syscall(unsigned long, unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
+#define __sys_ni_syscall sys_ni_syscall
#endif /* CONFIG_IA32_EMULATION */
#include <asm/syscalls_32.h>
@@ -29,6 +27,6 @@ __visible const sys_call_ptr_t ia32_sys_call_table[__NR_syscall_compat_max+1] =
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_compat_max] = &sys_ni_syscall,
+ [0 ... __NR_syscall_compat_max] = &__sys_ni_syscall,
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c
index b1bf31713374..adf619a856e8 100644
--- a/arch/x86/entry/syscall_64.c
+++ b/arch/x86/entry/syscall_64.c
@@ -4,11 +4,17 @@
#include <linux/linkage.h>
#include <linux/sys.h>
#include <linux/cache.h>
+#include <linux/syscalls.h>
#include <asm/asm-offsets.h>
#include <asm/syscall.h>
-/* this is a lie, but it does not hurt as sys_ni_syscall just returns -EINVAL */
-extern asmlinkage long sys_ni_syscall(const struct pt_regs *);
+extern asmlinkage long sys_ni_syscall(void);
+
+SYSCALL_DEFINE0(ni_syscall)
+{
+ return sys_ni_syscall();
+}
+
#define __SYSCALL_64(nr, sym, qual) extern asmlinkage long sym(const struct pt_regs *);
#define __SYSCALL_X32(nr, sym, qual) __SYSCALL_64(nr, sym, qual)
#include <asm/syscalls_64.h>
@@ -23,7 +29,7 @@ asmlinkage const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_max] = &sys_ni_syscall,
+ [0 ... __NR_syscall_max] = &__x64_sys_ni_syscall,
#include <asm/syscalls_64.h>
};
@@ -40,7 +46,7 @@ asmlinkage const sys_call_ptr_t x32_sys_call_table[__NR_syscall_x32_max+1] = {
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_x32_max] = &sys_ni_syscall,
+ [0 ... __NR_syscall_x32_max] = &__x64_sys_ni_syscall,
#include <asm/syscalls_64.h>
};
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 3fe02546aed3..15908eb9b17e 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -124,13 +124,13 @@
110 i386 iopl sys_iopl __ia32_sys_iopl
111 i386 vhangup sys_vhangup __ia32_sys_vhangup
112 i386 idle
-113 i386 vm86old sys_vm86old sys_ni_syscall
+113 i386 vm86old sys_vm86old __ia32_sys_ni_syscall
114 i386 wait4 sys_wait4 __ia32_compat_sys_wait4
115 i386 swapoff sys_swapoff __ia32_sys_swapoff
116 i386 sysinfo sys_sysinfo __ia32_compat_sys_sysinfo
117 i386 ipc sys_ipc __ia32_compat_sys_ipc
118 i386 fsync sys_fsync __ia32_sys_fsync
-119 i386 sigreturn sys_sigreturn sys32_sigreturn
+119 i386 sigreturn sys_sigreturn __ia32_compat_sys_sigreturn
120 i386 clone sys_clone __ia32_compat_sys_x86_clone
121 i386 setdomainname sys_setdomainname __ia32_sys_setdomainname
122 i386 uname sys_newuname __ia32_sys_newuname
@@ -177,14 +177,14 @@
163 i386 mremap sys_mremap __ia32_sys_mremap
164 i386 setresuid sys_setresuid16 __ia32_sys_setresuid16
165 i386 getresuid sys_getresuid16 __ia32_sys_getresuid16
-166 i386 vm86 sys_vm86 sys_ni_syscall
+166 i386 vm86 sys_vm86 __ia32_sys_ni_syscall
167 i386 query_module
168 i386 poll sys_poll __ia32_sys_poll
169 i386 nfsservctl
170 i386 setresgid sys_setresgid16 __ia32_sys_setresgid16
171 i386 getresgid sys_getresgid16 __ia32_sys_getresgid16
172 i386 prctl sys_prctl __ia32_sys_prctl
-173 i386 rt_sigreturn sys_rt_sigreturn sys32_rt_sigreturn
+173 i386 rt_sigreturn sys_rt_sigreturn __ia32_compat_sys_rt_sigreturn
174 i386 rt_sigaction sys_rt_sigaction __ia32_compat_sys_rt_sigaction
175 i386 rt_sigprocmask sys_rt_sigprocmask __ia32_compat_sys_rt_sigprocmask
176 i386 rt_sigpending sys_rt_sigpending __ia32_compat_sys_rt_sigpending
diff --git a/arch/x86/entry/thunk_32.S b/arch/x86/entry/thunk_32.S
index 2713490611a3..e010d4ae11f1 100644
--- a/arch/x86/entry/thunk_32.S
+++ b/arch/x86/entry/thunk_32.S
@@ -10,8 +10,7 @@
/* put return address in eax (arg1) */
.macro THUNK name, func, put_ret_addr_in_eax=0
- .globl \name
-\name:
+SYM_CODE_START_NOALIGN(\name)
pushl %eax
pushl %ecx
pushl %edx
@@ -27,6 +26,7 @@
popl %eax
ret
_ASM_NOKPROBE(\name)
+SYM_CODE_END(\name)
.endm
#ifdef CONFIG_TRACE_IRQFLAGS
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index ea5c4167086c..c5c3b6e86e62 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -12,7 +12,7 @@
/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
.macro THUNK name, func, put_ret_addr_in_rdi=0
- ENTRY(\name)
+SYM_FUNC_START_NOALIGN(\name)
pushq %rbp
movq %rsp, %rbp
@@ -33,7 +33,7 @@
call \func
jmp .L_restore
- ENDPROC(\name)
+SYM_FUNC_END(\name)
_ASM_NOKPROBE(\name)
.endm
@@ -56,7 +56,7 @@
#if defined(CONFIG_TRACE_IRQFLAGS) \
|| defined(CONFIG_DEBUG_LOCK_ALLOC) \
|| defined(CONFIG_PREEMPTION)
-.L_restore:
+SYM_CODE_START_LOCAL_NOALIGN(.L_restore)
popq %r11
popq %r10
popq %r9
@@ -69,4 +69,5 @@
popq %rbp
ret
_ASM_NOKPROBE(.L_restore)
+SYM_CODE_END(.L_restore)
#endif
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 0f2154106d01..2b75e80f6b41 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -87,11 +87,9 @@ $(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
#
-CFLAGS_REMOVE_vdso-note.o = -pg
CFLAGS_REMOVE_vclock_gettime.o = -pg
CFLAGS_REMOVE_vdso32/vclock_gettime.o = -pg
CFLAGS_REMOVE_vgetcpu.o = -pg
-CFLAGS_REMOVE_vvar.o = -pg
#
# X32 processes use x32 vDSO to access 64bit kernel data.
diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S
index 263d7433dea8..de1fff7188aa 100644
--- a/arch/x86/entry/vdso/vdso32/system_call.S
+++ b/arch/x86/entry/vdso/vdso32/system_call.S
@@ -62,7 +62,7 @@ __kernel_vsyscall:
/* Enter using int $0x80 */
int $0x80
-GLOBAL(int80_landing_pad)
+SYM_INNER_LABEL(int80_landing_pad, SYM_L_GLOBAL)
/*
* Restore EDX and ECX in case they were clobbered. EBP is not
diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c
index 64c3e70b0556..a7752cd78b89 100644
--- a/arch/x86/events/amd/core.c
+++ b/arch/x86/events/amd/core.c
@@ -652,15 +652,7 @@ static void amd_pmu_disable_event(struct perf_event *event)
*/
static int amd_pmu_handle_irq(struct pt_regs *regs)
{
- struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
- int active, handled;
-
- /*
- * Obtain the active count before calling x86_pmu_handle_irq() since
- * it is possible that x86_pmu_handle_irq() may make a counter
- * inactive (through x86_pmu_stop).
- */
- active = __bitmap_weight(cpuc->active_mask, X86_PMC_IDX_MAX);
+ int handled;
/* Process any counter overflows */
handled = x86_pmu_handle_irq(regs);
@@ -670,8 +662,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs)
* NMIs will be claimed if arriving within that window.
*/
if (handled) {
- this_cpu_write(perf_nmi_tstamp,
- jiffies + perf_nmi_window);
+ this_cpu_write(perf_nmi_tstamp, jiffies + perf_nmi_window);
return handled;
}
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 7b21455d7504..6e3f0c18908e 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2243,6 +2243,13 @@ static void x86_pmu_sched_task(struct perf_event_context *ctx, bool sched_in)
x86_pmu.sched_task(ctx, sched_in);
}
+static void x86_pmu_swap_task_ctx(struct perf_event_context *prev,
+ struct perf_event_context *next)
+{
+ if (x86_pmu.swap_task_ctx)
+ x86_pmu.swap_task_ctx(prev, next);
+}
+
void perf_check_microcode(void)
{
if (x86_pmu.check_microcode)
@@ -2297,6 +2304,7 @@ static struct pmu pmu = {
.event_idx = x86_pmu_event_idx,
.sched_task = x86_pmu_sched_task,
.task_ctx_size = sizeof(struct x86_perf_task_context),
+ .swap_task_ctx = x86_pmu_swap_task_ctx,
.check_period = x86_pmu_check_period,
.aux_output_match = x86_pmu_aux_output_match,
diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c
index 5ee3fed881d3..38de4a7f6752 100644
--- a/arch/x86/events/intel/bts.c
+++ b/arch/x86/events/intel/bts.c
@@ -549,9 +549,11 @@ static int bts_event_init(struct perf_event *event)
* Note that the default paranoia setting permits unprivileged
* users to profile the kernel.
*/
- if (event->attr.exclude_kernel && perf_paranoid_kernel() &&
- !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ if (event->attr.exclude_kernel) {
+ ret = perf_allow_kernel(&event->attr);
+ if (ret)
+ return ret;
+ }
if (x86_add_exclusive(x86_lbr_exclusive_bts))
return -EBUSY;
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 937363b803c1..3be51aa06e67 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -3315,8 +3315,9 @@ static int intel_pmu_hw_config(struct perf_event *event)
if (x86_pmu.version < 3)
return -EINVAL;
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ ret = perf_allow_cpu(&event->attr);
+ if (ret)
+ return ret;
event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY;
@@ -3830,6 +3831,12 @@ static void intel_pmu_sched_task(struct perf_event_context *ctx,
intel_pmu_lbr_sched_task(ctx, sched_in);
}
+static void intel_pmu_swap_task_ctx(struct perf_event_context *prev,
+ struct perf_event_context *next)
+{
+ intel_pmu_lbr_swap_task_ctx(prev, next);
+}
+
static int intel_pmu_check_period(struct perf_event *event, u64 value)
{
return intel_pmu_has_bts_period(event, value) ? -EINVAL : 0;
@@ -3965,6 +3972,7 @@ static __initconst const struct x86_pmu intel_pmu = {
.guest_get_msrs = intel_guest_get_msrs,
.sched_task = intel_pmu_sched_task,
+ .swap_task_ctx = intel_pmu_swap_task_ctx,
.check_period = intel_pmu_check_period,
diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index ea54634eabf3..534c76606049 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -417,6 +417,29 @@ static void __intel_pmu_lbr_save(struct x86_perf_task_context *task_ctx)
cpuc->last_log_id = ++task_ctx->log_id;
}
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
+ struct perf_event_context *next)
+{
+ struct x86_perf_task_context *prev_ctx_data, *next_ctx_data;
+
+ swap(prev->task_ctx_data, next->task_ctx_data);
+
+ /*
+ * Architecture specific synchronization makes sense in
+ * case both prev->task_ctx_data and next->task_ctx_data
+ * pointers are allocated.
+ */
+
+ prev_ctx_data = next->task_ctx_data;
+ next_ctx_data = prev->task_ctx_data;
+
+ if (!prev_ctx_data || !next_ctx_data)
+ return;
+
+ swap(prev_ctx_data->lbr_callstack_users,
+ next_ctx_data->lbr_callstack_users);
+}
+
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
{
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
diff --git a/arch/x86/events/intel/p4.c b/arch/x86/events/intel/p4.c
index dee579efb2b2..a4cc66005ce8 100644
--- a/arch/x86/events/intel/p4.c
+++ b/arch/x86/events/intel/p4.c
@@ -776,8 +776,9 @@ static int p4_validate_raw_event(struct perf_event *event)
* the user needs special permissions to be able to use it
*/
if (p4_ht_active() && p4_event_bind_map[v].shared) {
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return -EACCES;
+ v = perf_allow_cpu(&event->attr);
+ if (v)
+ return v;
}
/* ESCR EventMask bits may be invalid */
diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 05e43d0f430b..1db7a51d9792 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -397,6 +397,20 @@ static bool pt_event_valid(struct perf_event *event)
* These all are cpu affine and operate on a local PT
*/
+static void pt_config_start(struct perf_event *event)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ u64 ctl = event->hw.config;
+
+ ctl |= RTIT_CTL_TRACEEN;
+ if (READ_ONCE(pt->vmx_on))
+ perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
+ else
+ wrmsrl(MSR_IA32_RTIT_CTL, ctl);
+
+ WRITE_ONCE(event->hw.config, ctl);
+}
+
/* Address ranges and their corresponding msr configuration registers */
static const struct pt_address_range {
unsigned long msr_a;
@@ -469,6 +483,7 @@ static u64 pt_config_filters(struct perf_event *event)
static void pt_config(struct perf_event *event)
{
struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 reg;
/* First round: clear STATUS, in particular the PSB byte counter. */
@@ -478,7 +493,9 @@ static void pt_config(struct perf_event *event)
}
reg = pt_config_filters(event);
- reg |= RTIT_CTL_TOPA | RTIT_CTL_TRACEEN;
+ reg |= RTIT_CTL_TRACEEN;
+ if (!buf->single)
+ reg |= RTIT_CTL_TOPA;
/*
* Previously, we had BRANCH_EN on by default, but now that PT has
@@ -501,10 +518,7 @@ static void pt_config(struct perf_event *event)
reg |= (event->attr.config & PT_CONFIG_MASK);
event->hw.config = reg;
- if (READ_ONCE(pt->vmx_on))
- perf_aux_output_flag(&pt->handle, PERF_AUX_FLAG_PARTIAL);
- else
- wrmsrl(MSR_IA32_RTIT_CTL, reg);
+ pt_config_start(event);
}
static void pt_config_stop(struct perf_event *event)
@@ -533,18 +547,6 @@ static void pt_config_stop(struct perf_event *event)
wmb();
}
-static void pt_config_buffer(void *buf, unsigned int topa_idx,
- unsigned int output_off)
-{
- u64 reg;
-
- wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, virt_to_phys(buf));
-
- reg = 0x7f | ((u64)topa_idx << 7) | ((u64)output_off << 32);
-
- wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
-}
-
/**
* struct topa - ToPA metadata
* @list: linkage to struct pt_buffer's list of tables
@@ -602,6 +604,33 @@ static inline phys_addr_t topa_pfn(struct topa *topa)
#define TOPA_ENTRY_SIZE(t, i) (sizes(TOPA_ENTRY((t), (i))->size))
#define TOPA_ENTRY_PAGES(t, i) (1 << TOPA_ENTRY((t), (i))->size)
+static void pt_config_buffer(struct pt_buffer *buf)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ u64 reg, mask;
+ void *base;
+
+ if (buf->single) {
+ base = buf->data_pages[0];
+ mask = (buf->nr_pages * PAGE_SIZE - 1) >> 7;
+ } else {
+ base = topa_to_page(buf->cur)->table;
+ mask = (u64)buf->cur_idx;
+ }
+
+ reg = virt_to_phys(base);
+ if (pt->output_base != reg) {
+ pt->output_base = reg;
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_BASE, reg);
+ }
+
+ reg = 0x7f | (mask << 7) | ((u64)buf->output_off << 32);
+ if (pt->output_mask != reg) {
+ pt->output_mask = reg;
+ wrmsrl(MSR_IA32_RTIT_OUTPUT_MASK, reg);
+ }
+}
+
/**
* topa_alloc() - allocate page-sized ToPA table
* @cpu: CPU on which to allocate.
@@ -802,6 +831,11 @@ static void pt_update_head(struct pt *pt)
struct pt_buffer *buf = perf_get_aux(&pt->handle);
u64 topa_idx, base, old;
+ if (buf->single) {
+ local_set(&buf->data_size, buf->output_off);
+ return;
+ }
+
/* offset of the first region in this table from the beginning of buf */
base = buf->cur->offset + buf->output_off;
@@ -903,18 +937,21 @@ static void pt_handle_status(struct pt *pt)
*/
static void pt_read_offset(struct pt_buffer *buf)
{
- u64 offset, base_topa;
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
struct topa_page *tp;
- rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, base_topa);
- tp = phys_to_virt(base_topa);
- buf->cur = &tp->topa;
+ if (!buf->single) {
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_BASE, pt->output_base);
+ tp = phys_to_virt(pt->output_base);
+ buf->cur = &tp->topa;
+ }
- rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, offset);
+ rdmsrl(MSR_IA32_RTIT_OUTPUT_MASK, pt->output_mask);
/* offset within current output region */
- buf->output_off = offset >> 32;
+ buf->output_off = pt->output_mask >> 32;
/* index of current output region within this table */
- buf->cur_idx = (offset & 0xffffff80) >> 7;
+ if (!buf->single)
+ buf->cur_idx = (pt->output_mask & 0xffffff80) >> 7;
}
static struct topa_entry *
@@ -1030,6 +1067,9 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
unsigned long head = local64_read(&buf->head);
unsigned long idx, npages, wakeup;
+ if (buf->single)
+ return 0;
+
/* can't stop in the middle of an output region */
if (buf->output_off + handle->size + 1 < pt_buffer_region_size(buf)) {
perf_aux_output_flag(handle, PERF_AUX_FLAG_TRUNCATED);
@@ -1111,13 +1151,17 @@ static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
if (buf->snapshot)
head &= (buf->nr_pages << PAGE_SHIFT) - 1;
- pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
- te = pt_topa_entry_for_page(buf, pg);
+ if (!buf->single) {
+ pg = (head >> PAGE_SHIFT) & (buf->nr_pages - 1);
+ te = pt_topa_entry_for_page(buf, pg);
- cur_tp = topa_entry_to_page(te);
- buf->cur = &cur_tp->topa;
- buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
- buf->output_off = head & (pt_buffer_region_size(buf) - 1);
+ cur_tp = topa_entry_to_page(te);
+ buf->cur = &cur_tp->topa;
+ buf->cur_idx = te - TOPA_ENTRY(buf->cur, 0);
+ buf->output_off = head & (pt_buffer_region_size(buf) - 1);
+ } else {
+ buf->output_off = head;
+ }
local64_set(&buf->head, head);
local_set(&buf->data_size, 0);
@@ -1131,6 +1175,9 @@ static void pt_buffer_fini_topa(struct pt_buffer *buf)
{
struct topa *topa, *iter;
+ if (buf->single)
+ return;
+
list_for_each_entry_safe(topa, iter, &buf->tables, list) {
/*
* right now, this is in free_aux() path only, so
@@ -1176,6 +1223,36 @@ static int pt_buffer_init_topa(struct pt_buffer *buf, int cpu,
return 0;
}
+static int pt_buffer_try_single(struct pt_buffer *buf, int nr_pages)
+{
+ struct page *p = virt_to_page(buf->data_pages[0]);
+ int ret = -ENOTSUPP, order = 0;
+
+ /*
+ * We can use single range output mode
+ * + in snapshot mode, where we don't need interrupts;
+ * + if the hardware supports it;
+ * + if the entire buffer is one contiguous allocation.
+ */
+ if (!buf->snapshot)
+ goto out;
+
+ if (!intel_pt_validate_hw_cap(PT_CAP_single_range_output))
+ goto out;
+
+ if (PagePrivate(p))
+ order = page_private(p);
+
+ if (1 << order != nr_pages)
+ goto out;
+
+ buf->single = true;
+ buf->nr_pages = nr_pages;
+ ret = 0;
+out:
+ return ret;
+}
+
/**
* pt_buffer_setup_aux() - set up topa tables for a PT buffer
* @cpu: Cpu on which to allocate, -1 means current.
@@ -1198,6 +1275,13 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
if (!nr_pages)
return NULL;
+ /*
+ * Only support AUX sampling in snapshot mode, where we don't
+ * generate NMIs.
+ */
+ if (event->attr.aux_sample_size && !snapshot)
+ return NULL;
+
if (cpu == -1)
cpu = raw_smp_processor_id();
node = cpu_to_node(cpu);
@@ -1213,6 +1297,10 @@ pt_buffer_setup_aux(struct perf_event *event, void **pages,
INIT_LIST_HEAD(&buf->tables);
+ ret = pt_buffer_try_single(buf, nr_pages);
+ if (!ret)
+ return buf;
+
ret = pt_buffer_init_topa(buf, cpu, nr_pages, GFP_KERNEL);
if (ret) {
kfree(buf);
@@ -1379,9 +1467,8 @@ void intel_pt_interrupt(void)
return;
}
- pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
- buf->output_off);
- pt_config(event);
+ pt_config_buffer(buf);
+ pt_config_start(event);
}
}
@@ -1444,8 +1531,7 @@ static void pt_event_start(struct perf_event *event, int mode)
WRITE_ONCE(pt->handle_nmi, 1);
hwc->state = 0;
- pt_config_buffer(topa_to_page(buf->cur)->table, buf->cur_idx,
- buf->output_off);
+ pt_config_buffer(buf);
pt_config(event);
return;
@@ -1496,6 +1582,52 @@ static void pt_event_stop(struct perf_event *event, int mode)
}
}
+static long pt_event_snapshot_aux(struct perf_event *event,
+ struct perf_output_handle *handle,
+ unsigned long size)
+{
+ struct pt *pt = this_cpu_ptr(&pt_ctx);
+ struct pt_buffer *buf = perf_get_aux(&pt->handle);
+ unsigned long from = 0, to;
+ long ret;
+
+ if (WARN_ON_ONCE(!buf))
+ return 0;
+
+ /*
+ * Sampling is only allowed on snapshot events;
+ * see pt_buffer_setup_aux().
+ */
+ if (WARN_ON_ONCE(!buf->snapshot))
+ return 0;
+
+ /*
+ * Here, handle_nmi tells us if the tracing is on
+ */
+ if (READ_ONCE(pt->handle_nmi))
+ pt_config_stop(event);
+
+ pt_read_offset(buf);
+ pt_update_head(pt);
+
+ to = local_read(&buf->data_size);
+ if (to < size)
+ from = buf->nr_pages << PAGE_SHIFT;
+ from += to - size;
+
+ ret = perf_output_copy_aux(&pt->handle, handle, from, to);
+
+ /*
+ * If the tracing was on when we turned up, restart it.
+ * Compiler barrier not needed as we couldn't have been
+ * preempted by anything that touches pt->handle_nmi.
+ */
+ if (pt->handle_nmi)
+ pt_config_start(event);
+
+ return ret;
+}
+
static void pt_event_del(struct perf_event *event, int mode)
{
pt_event_stop(event, PERF_EF_UPDATE);
@@ -1615,6 +1747,7 @@ static __init int pt_init(void)
pt_pmu.pmu.del = pt_event_del;
pt_pmu.pmu.start = pt_event_start;
pt_pmu.pmu.stop = pt_event_stop;
+ pt_pmu.pmu.snapshot_aux = pt_event_snapshot_aux;
pt_pmu.pmu.read = pt_event_read;
pt_pmu.pmu.setup_aux = pt_buffer_setup_aux;
pt_pmu.pmu.free_aux = pt_buffer_free_aux;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 1d2bb7572374..96906a62aacd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -64,6 +64,7 @@ struct pt_pmu {
* @lost: if data was lost/truncated
* @head: logical write offset inside the buffer
* @snapshot: if this is for a snapshot/overwrite counter
+ * @single: use Single Range Output instead of ToPA
* @stop_pos: STOP topa entry index
* @intr_pos: INT topa entry index
* @stop_te: STOP topa entry pointer
@@ -80,6 +81,7 @@ struct pt_buffer {
local_t data_size;
local64_t head;
bool snapshot;
+ bool single;
long stop_pos, intr_pos;
struct topa_entry *stop_te, *intr_te;
void **data_pages;
@@ -111,16 +113,20 @@ struct pt_filters {
/**
* struct pt - per-cpu pt context
- * @handle: perf output handle
+ * @handle: perf output handle
* @filters: last configured filters
- * @handle_nmi: do handle PT PMI on this cpu, there's an active event
- * @vmx_on: 1 if VMX is ON on this cpu
+ * @handle_nmi: do handle PT PMI on this cpu, there's an active event
+ * @vmx_on: 1 if VMX is ON on this cpu
+ * @output_base: cached RTIT_OUTPUT_BASE MSR value
+ * @output_mask: cached RTIT_OUTPUT_MASK MSR value
*/
struct pt {
struct perf_output_handle handle;
struct pt_filters filters;
int handle_nmi;
int vmx_on;
+ u64 output_base;
+ u64 output_mask;
};
#endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index ecacfbf4ebc1..930611db8f9a 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -683,6 +683,14 @@ struct x86_pmu {
atomic_t lbr_exclusive[x86_lbr_exclusive_max];
/*
+ * perf task context (i.e. struct perf_event_context::task_ctx_data)
+ * switch helper to bridge calls from perf/core to perf/x86.
+ * See struct pmu::swap_task_ctx() usage for examples;
+ */
+ void (*swap_task_ctx)(struct perf_event_context *prev,
+ struct perf_event_context *next);
+
+ /*
* AMD bits
*/
unsigned int amd_nb_constraints : 1;
@@ -1016,6 +1024,9 @@ void intel_pmu_store_pebs_lbrs(struct pebs_lbr *lbr);
void intel_ds_init(void);
+void intel_pmu_lbr_swap_task_ctx(struct perf_event_context *prev,
+ struct perf_event_context *next);
+
void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
u64 lbr_from_signext_quirk_wr(u64 val);
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index e01078e93dd3..40e0e322161d 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -194,10 +194,20 @@ do_ex_hypercall:
static bool __send_ipi_one(int cpu, int vector)
{
- struct cpumask mask = CPU_MASK_NONE;
+ int vp = hv_cpu_number_to_vp_number(cpu);
- cpumask_set_cpu(cpu, &mask);
- return __send_ipi_mask(&mask, vector);
+ trace_hyperv_send_ipi_one(cpu, vector);
+
+ if (!hv_hypercall_pg || (vp == VP_INVAL))
+ return false;
+
+ if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
+ return false;
+
+ if (vp >= 64)
+ return __send_ipi_mask_ex(cpumask_of(cpu), vector);
+
+ return !hv_do_fast_hypercall16(HVCALL_SEND_IPI, vector, BIT_ULL(vp));
}
static void hv_send_ipi(int cpu, int vector)
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 2db3972c0e0f..50ff030d9224 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -311,6 +311,12 @@ void __init hyperv_init(void)
hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg);
wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
+ /*
+ * Ignore any errors in setting up stimer clockevents
+ * as we can run with the LAPIC timer as a fallback.
+ */
+ (void)hv_stimer_alloc();
+
hv_apic_init();
x86_init.pci.arch_init = hv_pci_init;
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 1cee10091b9f..30416d7f19d4 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -21,6 +21,7 @@
#include <linux/personality.h>
#include <linux/compat.h>
#include <linux/binfmts.h>
+#include <linux/syscalls.h>
#include <asm/ucontext.h>
#include <linux/uaccess.h>
#include <asm/fpu/internal.h>
@@ -118,7 +119,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
return err;
}
-asmlinkage long sys32_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
@@ -144,7 +145,7 @@ badframe:
return 0;
}
-asmlinkage long sys32_rt_sigreturn(void)
+COMPAT_SYSCALL_DEFINE0(rt_sigreturn)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame;
diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h
index 3ff577c0b102..cd339b88d5d4 100644
--- a/arch/x86/include/asm/asm.h
+++ b/arch/x86/include/asm/asm.h
@@ -7,9 +7,11 @@
# define __ASM_FORM_RAW(x) x
# define __ASM_FORM_COMMA(x) x,
#else
-# define __ASM_FORM(x) " " #x " "
-# define __ASM_FORM_RAW(x) #x
-# define __ASM_FORM_COMMA(x) " " #x ","
+#include <linux/stringify.h>
+
+# define __ASM_FORM(x) " " __stringify(x) " "
+# define __ASM_FORM_RAW(x) __stringify(x)
+# define __ASM_FORM_COMMA(x) " " __stringify(x) ","
#endif
#ifndef __x86_64__
@@ -139,9 +141,6 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
# define _ASM_NOKPROBE(entry) \
.pushsection "_kprobe_blacklist","aw" ; \
_ASM_ALIGN ; \
@@ -170,9 +169,6 @@
# define _ASM_EXTABLE_EX(from, to) \
_ASM_EXTABLE_HANDLE(from, to, ex_handler_ext)
-# define _ASM_EXTABLE_REFCOUNT(from, to) \
- _ASM_EXTABLE_HANDLE(from, to, ex_handler_refcount)
-
/* For C file, we already have NOKPROBE_SYMBOL macro */
#endif
diff --git a/arch/x86/include/asm/calgary.h b/arch/x86/include/asm/calgary.h
deleted file mode 100644
index facd374a1bf7..000000000000
--- a/arch/x86/include/asm/calgary.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Derived from include/asm-powerpc/iommu.h
- *
- * Copyright IBM Corporation, 2006-2007
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- */
-
-#ifndef _ASM_X86_CALGARY_H
-#define _ASM_X86_CALGARY_H
-
-#include <linux/spinlock.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/timer.h>
-#include <asm/types.h>
-
-struct iommu_table {
- const struct cal_chipset_ops *chip_ops; /* chipset specific funcs */
- unsigned long it_base; /* mapped address of tce table */
- unsigned long it_hint; /* Hint for next alloc */
- unsigned long *it_map; /* A simple allocation bitmap for now */
- void __iomem *bbar; /* Bridge BAR */
- u64 tar_val; /* Table Address Register */
- struct timer_list watchdog_timer;
- spinlock_t it_lock; /* Protects it_map */
- unsigned int it_size; /* Size of iommu table in entries */
- unsigned char it_busno; /* Bus number this table belongs to */
-};
-
-struct cal_chipset_ops {
- void (*handle_quirks)(struct iommu_table *tbl, struct pci_dev *dev);
- void (*tce_cache_blast)(struct iommu_table *tbl);
- void (*dump_error_regs)(struct iommu_table *tbl);
-};
-
-#define TCE_TABLE_SIZE_UNSPECIFIED ~0
-#define TCE_TABLE_SIZE_64K 0
-#define TCE_TABLE_SIZE_128K 1
-#define TCE_TABLE_SIZE_256K 2
-#define TCE_TABLE_SIZE_512K 3
-#define TCE_TABLE_SIZE_1M 4
-#define TCE_TABLE_SIZE_2M 5
-#define TCE_TABLE_SIZE_4M 6
-#define TCE_TABLE_SIZE_8M 7
-
-extern int use_calgary;
-
-#ifdef CONFIG_CALGARY_IOMMU
-extern int detect_calgary(void);
-#else
-static inline int detect_calgary(void) { return -ENODEV; }
-#endif
-
-#endif /* _ASM_X86_CALGARY_H */
diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 8348f7d69fd5..ea866c7bf31d 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -78,8 +78,12 @@ struct cpu_entry_area {
/*
* The GDT is just below entry_stack and thus serves (on x86_64) as
- * a a read-only guard page.
+ * a read-only guard page. On 32-bit the GDT must be writeable, so
+ * it needs an extra guard page.
*/
+#ifdef CONFIG_X86_32
+ char guard_entry_stack[PAGE_SIZE];
+#endif
struct entry_stack_page entry_stack_page;
/*
@@ -94,7 +98,6 @@ struct cpu_entry_area {
*/
struct cea_exception_stacks estacks;
#endif
-#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
@@ -105,11 +108,13 @@ struct cpu_entry_area {
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
-#endif
};
-#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
+#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
+
+/* Total size includes the readonly IDT mapping page as well: */
+#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
@@ -117,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
+/* Single page reserved for the readonly IDT mapping: */
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
#define CPU_ENTRY_AREA_MAP_SIZE \
- (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
+ (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index c4fbe379cc0b..e9b62498fe75 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -292,6 +292,7 @@
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
diff --git a/arch/x86/include/asm/crash.h b/arch/x86/include/asm/crash.h
index 0acf5ee45a21..f58de66091e5 100644
--- a/arch/x86/include/asm/crash.h
+++ b/arch/x86/include/asm/crash.h
@@ -2,10 +2,17 @@
#ifndef _ASM_X86_CRASH_H
#define _ASM_X86_CRASH_H
+struct kimage;
+
int crash_load_segments(struct kimage *image);
-int crash_copy_backup_region(struct kimage *image);
int crash_setup_memmap_entries(struct kimage *image,
struct boot_params *params);
void crash_smp_send_stop(void);
+#ifdef CONFIG_KEXEC_CORE
+void __init crash_reserve_low_1M(void);
+#else
+static inline void __init crash_reserve_low_1M(void) { }
+#endif
+
#endif /* _ASM_X86_CRASH_H */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index a5ea841cc6d2..8e1d0bb46361 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -22,7 +22,7 @@
# define DISABLE_SMAP (1<<(X86_FEATURE_SMAP & 31))
#endif
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
# define DISABLE_UMIP 0
#else
# define DISABLE_UMIP (1<<(X86_FEATURE_UMIP & 31))
diff --git a/arch/x86/include/asm/e820/types.h b/arch/x86/include/asm/e820/types.h
index c3aa4b5e49e2..314f75d886d0 100644
--- a/arch/x86/include/asm/e820/types.h
+++ b/arch/x86/include/asm/e820/types.h
@@ -29,6 +29,14 @@ enum e820_type {
E820_TYPE_PRAM = 12,
/*
+ * Special-purpose memory is indicated to the system via the
+ * EFI_MEMORY_SP attribute. Define an e820 translation of this
+ * memory type for the purpose of reserving this range and
+ * marking it with the IORES_DESC_SOFT_RESERVED designation.
+ */
+ E820_TYPE_SOFT_RESERVED = 0xefffffff,
+
+ /*
* Reserved RAM used by the kernel itself if
* CONFIG_INTEL_TXT=y is enabled, memory of this type
* will be included in the S3 integrity calculation
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 43a82e59c59d..d028e9acdf1c 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -140,7 +140,6 @@ extern void efi_delete_dummy_variable(void);
extern void efi_switch_mm(struct mm_struct *mm);
extern void efi_recover_from_page_fault(unsigned long phys_addr);
extern void efi_free_boot_services(void);
-extern void efi_reserve_boot_services(void);
struct efi_setup_data {
u64 fw_vendor;
@@ -244,6 +243,8 @@ static inline bool efi_is_64bit(void)
extern bool efi_reboot_required(void);
extern bool efi_is_table_address(unsigned long phys_addr);
+extern void efi_find_mirror(void);
+extern void efi_reserve_boot_services(void);
#else
static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {}
static inline bool efi_reboot_required(void)
@@ -254,6 +255,20 @@ static inline bool efi_is_table_address(unsigned long phys_addr)
{
return false;
}
+static inline void efi_find_mirror(void)
+{
+}
+static inline void efi_reserve_boot_services(void)
+{
+}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_EFI_FAKE_MEMMAP
+extern void __init efi_fake_memmap_early(void);
+#else
+static inline void efi_fake_memmap_early(void)
+{
+}
+#endif
+
#endif /* _ASM_X86_EFI_H */
diff --git a/arch/x86/include/asm/emulate_prefix.h b/arch/x86/include/asm/emulate_prefix.h
new file mode 100644
index 000000000000..70f5b98a5286
--- /dev/null
+++ b/arch/x86/include/asm/emulate_prefix.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_EMULATE_PREFIX_H
+#define _ASM_X86_EMULATE_PREFIX_H
+
+/*
+ * Virt escape sequences to trigger instruction emulation;
+ * ideally these would decode to 'whole' instruction and not destroy
+ * the instruction stream; sadly this is not true for the 'kvm' one :/
+ */
+
+#define __XEN_EMULATE_PREFIX 0x0f,0x0b,0x78,0x65,0x6e /* ud2 ; .ascii "xen" */
+#define __KVM_EMULATE_PREFIX 0x0f,0x0b,0x6b,0x76,0x6d /* ud2 ; .ascii "kvm" */
+
+#endif
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
index 0c47aa82e2e2..28183ee3cc42 100644
--- a/arch/x86/include/asm/fixmap.h
+++ b/arch/x86/include/asm/fixmap.h
@@ -156,7 +156,7 @@ extern pte_t *kmap_pte;
extern pte_t *pkmap_page_table;
void __native_set_fixmap(enum fixed_addresses idx, pte_t pte);
-void native_set_fixmap(enum fixed_addresses idx,
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
phys_addr_t phys, pgprot_t flags);
#ifndef CONFIG_PARAVIRT_XXL
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index c38a66661576..c2a7458f912c 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -28,6 +28,19 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
return addr;
}
+/*
+ * When a ftrace registered caller is tracing a function that is
+ * also set by a register_ftrace_direct() call, it needs to be
+ * differentiated in the ftrace_caller trampoline. To do this, we
+ * place the direct caller in the ORIG_AX part of pt_regs. This
+ * tells the ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, unsigned long addr)
+{
+ /* Emulate a call */
+ regs->orig_ax = addr;
+}
+
#ifdef CONFIG_DYNAMIC_FTRACE
struct dyn_arch_ftrace {
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 7741e211f7f5..5f10f7f2098d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -86,6 +86,8 @@
#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11)
/* AccessReenlightenmentControls privilege */
#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13)
+/* AccessTscInvariantControls privilege */
+#define HV_X64_ACCESS_TSC_INVARIANT BIT(15)
/*
* Feature identification: indicates which flags were specified at partition
@@ -278,6 +280,9 @@
#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+/* TSC invariant control */
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 154f27be8bfc..5c1ae3eff9d4 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -45,6 +45,7 @@ struct insn {
struct insn_field immediate2; /* for 64bit imm or seg16 */
};
+ int emulate_prefix_size;
insn_attr_t attr;
unsigned char opnd_bytes;
unsigned char addr_bytes;
@@ -128,6 +129,11 @@ static inline int insn_is_evex(struct insn *insn)
return (insn->vex_prefix.nbytes == 4);
}
+static inline int insn_has_emulate_prefix(struct insn *insn)
+{
+ return !!insn->emulate_prefix_size;
+}
+
/* Ensure this instruction is decoded completely */
static inline int insn_complete(struct insn *insn)
{
diff --git a/arch/x86/include/asm/io_bitmap.h b/arch/x86/include/asm/io_bitmap.h
new file mode 100644
index 000000000000..02c6ef8f7667
--- /dev/null
+++ b/arch/x86/include/asm/io_bitmap.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_IOBITMAP_H
+#define _ASM_X86_IOBITMAP_H
+
+#include <linux/refcount.h>
+#include <asm/processor.h>
+
+struct io_bitmap {
+ u64 sequence;
+ refcount_t refcnt;
+ /* The maximum number of bytes to copy so all zero bits are covered */
+ unsigned int max;
+ unsigned long bitmap[IO_BITMAP_LONGS];
+};
+
+struct task_struct;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+void io_bitmap_share(struct task_struct *tsk);
+void io_bitmap_exit(void);
+
+void tss_update_io_bitmap(void);
+#else
+static inline void io_bitmap_share(struct task_struct *tsk) { }
+static inline void io_bitmap_exit(void) { }
+static inline void tss_update_io_bitmap(void) { }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5e7d6b46de97..6802c59e8252 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -66,10 +66,6 @@ struct kimage;
# define KEXEC_ARCH KEXEC_ARCH_X86_64
#endif
-/* Memory to backup during crash kdump */
-#define KEXEC_BACKUP_SRC_START (0UL)
-#define KEXEC_BACKUP_SRC_END (640 * 1024UL - 1) /* 640K */
-
/*
* This function is responsible for capturing register states if coming
* via panic otherwise just fix up the ss and sp if coming via kernel
@@ -154,12 +150,6 @@ struct kimage_arch {
pud_t *pud;
pmd_t *pmd;
pte_t *pte;
- /* Details of backup region */
- unsigned long backup_src_start;
- unsigned long backup_src_sz;
-
- /* Physical address of backup segment */
- unsigned long backup_load_addr;
/* Core ELF header buffer */
void *elf_headers;
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 14caa9d9fb7f..365111789cc6 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -13,10 +13,6 @@
#ifdef __ASSEMBLY__
-#define GLOBAL(name) \
- .globl name; \
- name:
-
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
#define __ALIGN .p2align 4, 0x90
#define __ALIGN_STR __stringify(__ALIGN)
diff --git a/arch/x86/include/asm/module.h b/arch/x86/include/asm/module.h
index 7948a17febb4..c215d2762488 100644
--- a/arch/x86/include/asm/module.h
+++ b/arch/x86/include/asm/module.h
@@ -15,6 +15,8 @@ struct mod_arch_specific {
#ifdef CONFIG_X86_64
/* X86_64 does not define MODULE_PROC_FAMILY */
+#elif defined CONFIG_M486SX
+#define MODULE_PROC_FAMILY "486SX "
#elif defined CONFIG_M486
#define MODULE_PROC_FAMILY "486 "
#elif defined CONFIG_M586
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 69089d46f128..86e7317eb31f 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -294,10 +294,6 @@ static inline void write_idt_entry(gate_desc *dt, int entry, const gate_desc *g)
{
PVOP_VCALL3(cpu.write_idt_entry, dt, entry, g);
}
-static inline void set_iopl_mask(unsigned mask)
-{
- PVOP_VCALL1(cpu.set_iopl_mask, mask);
-}
static inline void paravirt_activate_mm(struct mm_struct *prev,
struct mm_struct *next)
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 70b654f3ffe5..84812964d3dd 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -140,8 +140,6 @@ struct pv_cpu_ops {
void (*load_sp0)(unsigned long sp0);
- void (*set_iopl_mask)(unsigned mask);
-
void (*wbinvd)(void);
/* cpuid emulation, mostly so that caps bits can be disabled */
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index e662f987dfa2..90d0731fdcb6 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -12,8 +12,6 @@
#include <asm/pat.h>
#include <asm/x86_init.h>
-#ifdef __KERNEL__
-
struct pci_sysdata {
int domain; /* PCI domain */
int node; /* NUMA node */
@@ -118,11 +116,6 @@ void native_restore_msi_irqs(struct pci_dev *dev);
#define native_setup_msi_irqs NULL
#define native_teardown_msi_irq NULL
#endif
-#endif /* __KERNEL__ */
-
-#ifdef CONFIG_X86_64
-#include <asm/pci_64.h>
-#endif
/* generic pci stuff */
#include <asm-generic/pci.h>
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h
deleted file mode 100644
index f5411de0ae11..000000000000
--- a/arch/x86/include/asm/pci_64.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_PCI_64_H
-#define _ASM_X86_PCI_64_H
-
-#ifdef __KERNEL__
-
-#ifdef CONFIG_CALGARY_IOMMU
-static inline void *pci_iommu(struct pci_bus *bus)
-{
- struct pci_sysdata *sd = bus->sysdata;
- return sd->iommu;
-}
-
-static inline void set_pci_iommu(struct pci_bus *bus, void *val)
-{
- struct pci_sysdata *sd = bus->sysdata;
- sd->iommu = val;
-}
-#endif /* CONFIG_CALGARY_IOMMU */
-
-extern int (*pci_config_read)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 *value);
-extern int (*pci_config_write)(int seg, int bus, int dev, int fn,
- int reg, int len, u32 value);
-
-#endif /* __KERNEL__ */
-
-#endif /* _ASM_X86_PCI_64_H */
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index e3633795fb22..5afb5e0fe903 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -36,39 +36,41 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
#define pmd_read_atomic pmd_read_atomic
/*
- * pte_offset_map_lock on 32bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by gcc. Problem is, in certain places
- * where pte_offset_map_lock is called, concurrent page faults are
+ * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
+ * a "*pmdp" dereference done by GCC. Problem is, in certain places
+ * where pte_offset_map_lock() is called, concurrent page faults are
* allowed, if the mmap_sem is hold for reading. An example is mincore
* vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate rightfully does a set_64bit, but if we're reading the
+ * pmd_populate() rightfully does a set_64bit(), but if we're reading the
* pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because gcc will not read the 64bit of the pmd atomically. To fix
- * this all places running pmd_offset_map_lock() while holding the
+ * because GCC will not read the 64-bit value of the pmd atomically.
+ *
+ * To fix this all places running pte_offset_map_lock() while holding the
* mmap_sem in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null nor not, and in turn to know if
- * they can run pmd_offset_map_lock or pmd_trans_huge or other pmd
+ * function to know if the pmd is null or not, and in turn to know if
+ * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
* operations.
*
- * Without THP if the mmap_sem is hold for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic runs. So
+ * Without THP if the mmap_sem is held for reading, the pmd can only
+ * transition from null to not null while pmd_read_atomic() runs. So
* we can always return atomic pmd values with this function.
*
- * With THP if the mmap_sem is hold for reading, the pmd can become
+ * With THP if the mmap_sem is held for reading, the pmd can become
* trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic. We could read it really
- * atomically here with a atomic64_read for the THP enabled case (and
+ * at any time under pmd_read_atomic(). We could read it truly
+ * atomically here with an atomic64_read() for the THP enabled case (and
* it would be a whole lot simpler), but to avoid using cmpxchg8b we
* only return an atomic pmdval if the low part of the pmdval is later
- * found stable (i.e. pointing to a pte). And we're returning a none
- * pmdval if the low part of the pmd is none. In some cases the high
- * and low part of the pmdval returned may not be consistent if THP is
- * enabled (the low part may point to previously mapped hugepage,
- * while the high part may point to a more recently mapped hugepage),
- * but pmd_none_or_trans_huge_or_clear_bad() only needs the low part
- * of the pmd to be read atomically to decide if the pmd is unstable
- * or not, with the only exception of when the low part of the pmd is
- * zero in which case we return a none pmd.
+ * found to be stable (i.e. pointing to a pte). We are also returning a
+ * 'none' (zero) pmdval if the low part of the pmd is zero.
+ *
+ * In some cases the high and low part of the pmdval returned may not be
+ * consistent if THP is enabled (the low part may point to previously
+ * mapped hugepage, while the high part may point to a more recently
+ * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
+ * needs the low part of the pmd to be read atomically to decide if the
+ * pmd is unstable or not, with the only exception when the low part
+ * of the pmd is zero, in which case we return a 'none' pmd.
*/
static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
{
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h
index b0bc0fff5f1f..19f5807260c3 100644
--- a/arch/x86/include/asm/pgtable_32_types.h
+++ b/arch/x86/include/asm/pgtable_32_types.h
@@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
* to avoid include recursion hell
*/
-#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
+#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 41)
-#define CPU_ENTRY_AREA_BASE \
- ((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
- & PMD_MASK)
+/* The +1 is for the readonly IDT page: */
+#define CPU_ENTRY_AREA_BASE \
+ ((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
#define LDT_BASE_ADDR \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 54f5d54280f6..e51afbb0cbfb 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -7,6 +7,7 @@
/* Forward declaration, a strange C thing */
struct task_struct;
struct mm_struct;
+struct io_bitmap;
struct vm86;
#include <asm/math_emu.h>
@@ -93,7 +94,15 @@ struct cpuinfo_x86 {
__u32 extended_cpuid_level;
/* Maximum supported CPUID level, -1=no CPUID: */
int cpuid_level;
- __u32 x86_capability[NCAPINTS + NBUGINTS];
+ /*
+ * Align to size of unsigned long because the x86_capability array
+ * is passed to bitops which require the alignment. Use unnamed
+ * union to enforce the array is aligned to size of unsigned long.
+ */
+ union {
+ __u32 x86_capability[NCAPINTS + NBUGINTS];
+ unsigned long x86_capability_alignment;
+ };
char x86_vendor_id[16];
char x86_model_id[64];
/* in KB - valid for CPUS which support this call: */
@@ -328,10 +337,32 @@ struct x86_hw_tss {
* IO-bitmap sizes:
*/
#define IO_BITMAP_BITS 65536
-#define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
-#define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
-#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
-#define INVALID_IO_BITMAP_OFFSET 0x8000
+#define IO_BITMAP_BYTES (IO_BITMAP_BITS / BITS_PER_BYTE)
+#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))
+
+#define IO_BITMAP_OFFSET_VALID_MAP \
+ (offsetof(struct tss_struct, io_bitmap.bitmap) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#define IO_BITMAP_OFFSET_VALID_ALL \
+ (offsetof(struct tss_struct, io_bitmap.mapall) - \
+ offsetof(struct tss_struct, x86_tss))
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+/*
+ * sizeof(unsigned long) coming from an extra "long" at the end of the
+ * iobitmap. The limit is inclusive, i.e. the last valid byte.
+ */
+# define __KERNEL_TSS_LIMIT \
+ (IO_BITMAP_OFFSET_VALID_ALL + IO_BITMAP_BYTES + \
+ sizeof(unsigned long) - 1)
+#else
+# define __KERNEL_TSS_LIMIT \
+ (offsetof(struct tss_struct, x86_tss) + sizeof(struct x86_hw_tss) - 1)
+#endif
+
+/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
+#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
struct entry_stack {
unsigned long words[64];
@@ -341,13 +372,21 @@ struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);
-struct tss_struct {
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap {
+ /* The sequence number of the last active bitmap. */
+ u64 prev_sequence;
+
/*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
+ * Store the dirty size of the last io bitmap offender. The next
+ * one will have to do the cleanup as the switch out to a non io
+ * bitmap user will just set x86_tss.io_bitmap_base to a value
+ * outside of the TSS limit. So for sane tasks there is no need to
+ * actually touch the io_bitmap at all.
*/
- struct x86_hw_tss x86_tss;
+ unsigned int prev_max;
/*
* The extra 1 is there because the CPU will access an
@@ -355,21 +394,28 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
+ unsigned long bitmap[IO_BITMAP_LONGS + 1];
+
+ /*
+ * Special I/O bitmap to emulate IOPL(3). All bytes zero,
+ * except the additional byte at the end.
+ */
+ unsigned long mapall[IO_BITMAP_LONGS + 1];
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+ struct x86_io_bitmap io_bitmap;
} __aligned(PAGE_SIZE);
DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
-/*
- * sizeof(unsigned long) coming from an extra "long" at the end
- * of the iobitmap.
- *
- * -1? seg base+limit should be pointing to the address of the
- * last valid byte
- */
-#define __KERNEL_TSS_LIMIT \
- (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
-
/* Per CPU interrupt stacks */
struct irq_stack {
char stack[IRQ_STACK_SIZE];
@@ -480,10 +526,14 @@ struct thread_struct {
struct vm86 *vm86;
#endif
/* IO permissions: */
- unsigned long *io_bitmap_ptr;
- unsigned long iopl;
- /* Max allowed port in the bitmap, in bytes: */
- unsigned io_bitmap_max;
+ struct io_bitmap *io_bitmap;
+
+ /*
+ * IOPL. Priviledge level dependent I/O permission which is
+ * emulated via the I/O bitmap to prevent user space from disabling
+ * interrupts.
+ */
+ unsigned long iopl_emul;
mm_segment_t addr_limit;
@@ -515,25 +565,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
*/
#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/
-/*
- * Set IOPL bits in EFLAGS from given mask
- */
-static inline void native_set_iopl_mask(unsigned mask)
-{
-#ifdef CONFIG_X86_32
- unsigned int reg;
-
- asm volatile ("pushfl;"
- "popl %0;"
- "andl %1, %0;"
- "orl %2, %0;"
- "pushl %0;"
- "popfl"
- : "=&r" (reg)
- : "i" (~X86_EFLAGS_IOPL), "r" (mask));
-#endif
-}
-
static inline void
native_load_sp0(unsigned long sp0)
{
@@ -573,7 +604,6 @@ static inline void load_sp0(unsigned long sp0)
native_load_sp0(sp0);
}
-#define set_iopl_mask native_set_iopl_mask
#endif /* CONFIG_PARAVIRT_XXL */
/* Free all resources held by a thread. */
@@ -841,7 +871,6 @@ static inline void spin_lock_prefetch(const void *x)
#define INIT_THREAD { \
.sp0 = TOP_OF_INIT_STACK, \
.sysenter_cs = __KERNEL_CS, \
- .io_bitmap_ptr = NULL, \
.addr_limit = KERNEL_DS, \
}
@@ -958,7 +987,7 @@ static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
extern unsigned long arch_align_stack(unsigned long sp);
void free_init_pages(const char *what, unsigned long begin, unsigned long end);
-extern void free_kernel_image_pages(void *begin, void *end);
+extern void free_kernel_image_pages(const char *what, void *begin, void *end);
void default_idle(void);
#ifdef CONFIG_XEN
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 332eb3525867..5057a8ed100b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -361,5 +361,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
extern int do_set_thread_area(struct task_struct *p, int idx,
struct user_desc __user *info, int can_allocate);
+#ifdef CONFIG_X86_64
+# define do_set_thread_area_64(p, s, t) do_arch_prctl_64(p, s, t)
+#else
+# define do_set_thread_area_64(p, s, t) (0)
+#endif
+
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_PTRACE_H */
diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h
index 92c34e517da1..5528e9325049 100644
--- a/arch/x86/include/asm/purgatory.h
+++ b/arch/x86/include/asm/purgatory.h
@@ -6,16 +6,6 @@
#include <linux/purgatory.h>
extern void purgatory(void);
-/*
- * These forward declarations serve two purposes:
- *
- * 1) Make sparse happy when checking arch/purgatory
- * 2) Document that these are required to be global so the symbol
- * lookup in kexec works
- */
-extern unsigned long purgatory_backup_dest;
-extern unsigned long purgatory_backup_src;
-extern unsigned long purgatory_backup_sz;
#endif /* __ASSEMBLY__ */
#endif /* _ASM_PURGATORY_H */
diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h
deleted file mode 100644
index 232f856e0db0..000000000000
--- a/arch/x86/include/asm/refcount.h
+++ /dev/null
@@ -1,126 +0,0 @@
-#ifndef __ASM_X86_REFCOUNT_H
-#define __ASM_X86_REFCOUNT_H
-/*
- * x86-specific implementation of refcount_t. Based on PAX_REFCOUNT from
- * PaX/grsecurity.
- */
-#include <linux/refcount.h>
-#include <asm/bug.h>
-
-/*
- * This is the first portion of the refcount error handling, which lives in
- * .text.unlikely, and is jumped to from the CPU flag check (in the
- * following macros). This saves the refcount value location into CX for
- * the exception handler to use (in mm/extable.c), and then triggers the
- * central refcount exception. The fixup address for the exception points
- * back to the regular execution flow in .text.
- */
-#define _REFCOUNT_EXCEPTION \
- ".pushsection .text..refcount\n" \
- "111:\tlea %[var], %%" _ASM_CX "\n" \
- "112:\t" ASM_UD2 "\n" \
- ASM_UNREACHABLE \
- ".popsection\n" \
- "113:\n" \
- _ASM_EXTABLE_REFCOUNT(112b, 113b)
-
-/* Trigger refcount exception if refcount result is negative. */
-#define REFCOUNT_CHECK_LT_ZERO \
- "js 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-/* Trigger refcount exception if refcount result is zero or negative. */
-#define REFCOUNT_CHECK_LE_ZERO \
- "jz 111f\n\t" \
- REFCOUNT_CHECK_LT_ZERO
-
-/* Trigger refcount exception unconditionally. */
-#define REFCOUNT_ERROR \
- "jmp 111f\n\t" \
- _REFCOUNT_EXCEPTION
-
-static __always_inline void refcount_add(unsigned int i, refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "addl %1,%0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : "ir" (i)
- : "cc", "cx");
-}
-
-static __always_inline void refcount_inc(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "incl %0\n\t"
- REFCOUNT_CHECK_LT_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline void refcount_dec(refcount_t *r)
-{
- asm volatile(LOCK_PREFIX "decl %0\n\t"
- REFCOUNT_CHECK_LE_ZERO
- : [var] "+m" (r->refs.counter)
- : : "cc", "cx");
-}
-
-static __always_inline __must_check
-bool refcount_sub_and_test(unsigned int i, refcount_t *r)
-{
- bool ret = GEN_BINARY_SUFFIXED_RMWcc(LOCK_PREFIX "subl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "er", i, "cx");
-
- if (ret) {
- smp_acquire__after_ctrl_dep();
- return true;
- }
-
- return false;
-}
-
-static __always_inline __must_check bool refcount_dec_and_test(refcount_t *r)
-{
- bool ret = GEN_UNARY_SUFFIXED_RMWcc(LOCK_PREFIX "decl",
- REFCOUNT_CHECK_LT_ZERO,
- r->refs.counter, e, "cx");
-
- if (ret) {
- smp_acquire__after_ctrl_dep();
- return true;
- }
-
- return false;
-}
-
-static __always_inline __must_check
-bool refcount_add_not_zero(unsigned int i, refcount_t *r)
-{
- int c, result;
-
- c = atomic_read(&(r->refs));
- do {
- if (unlikely(c == 0))
- return false;
-
- result = c + i;
-
- /* Did we try to increment from/to an undesirable state? */
- if (unlikely(c < 0 || c == INT_MAX || result < c)) {
- asm volatile(REFCOUNT_ERROR
- : : [var] "m" (r->refs.counter)
- : "cc", "cx");
- break;
- }
-
- } while (!atomic_try_cmpxchg(&(r->refs), &c, result));
-
- return c != 0;
-}
-
-static __always_inline __must_check bool refcount_inc_not_zero(refcount_t *r)
-{
- return refcount_add_not_zero(1, r);
-}
-
-#endif
diff --git a/arch/x86/include/asm/rio.h b/arch/x86/include/asm/rio.h
deleted file mode 100644
index 0a21986d2238..000000000000
--- a/arch/x86/include/asm/rio.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Derived from include/asm-x86/mach-summit/mach_mpparse.h
- * and include/asm-x86/mach-default/bios_ebda.h
- *
- * Author: Laurent Vivier <Laurent.Vivier@bull.net>
- */
-
-#ifndef _ASM_X86_RIO_H
-#define _ASM_X86_RIO_H
-
-#define RIO_TABLE_VERSION 3
-
-struct rio_table_hdr {
- u8 version; /* Version number of this data structure */
- u8 num_scal_dev; /* # of Scalability devices */
- u8 num_rio_dev; /* # of RIO I/O devices */
-} __attribute__((packed));
-
-struct scal_detail {
- u8 node_id; /* Scalability Node ID */
- u32 CBAR; /* Address of 1MB register space */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF = None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port2node; /* Node ID port connected to: 0xFF = None */
- u8 port2port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 chassis_num; /* 1 based Chassis number (1 = boot node) */
-} __attribute__((packed));
-
-struct rio_detail {
- u8 node_id; /* RIO Node ID */
- u32 BBAR; /* Address of 1MB register space */
- u8 type; /* Type of device */
- u8 owner_id; /* Node ID of Hurricane that owns this */
- /* node */
- u8 port0node; /* Node ID port connected to: 0xFF=None */
- u8 port0port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 port1node; /* Node ID port connected to: 0xFF=None */
- u8 port1port; /* Port num port connected to: 0,1,2, or */
- /* 0xFF=None */
- u8 first_slot; /* Lowest slot number below this Calgary */
- u8 status; /* Bit 0 = 1 : the XAPIC is used */
- /* = 0 : the XAPIC is not used, ie: */
- /* ints fwded to another XAPIC */
- /* Bits1:7 Reserved */
- u8 WP_index; /* instance index - lower ones have */
- /* lower slot numbers/PCI bus numbers */
- u8 chassis_num; /* 1 based Chassis number */
-} __attribute__((packed));
-
-enum {
- HURR_SCALABILTY = 0, /* Hurricane Scalability info */
- HURR_RIOIB = 2, /* Hurricane RIOIB info */
- COMPAT_CALGARY = 4, /* Compatibility Calgary */
- ALT_CALGARY = 5, /* Second Planar Calgary */
-};
-
-#endif /* _ASM_X86_RIO_H */
diff --git a/arch/x86/include/asm/sections.h b/arch/x86/include/asm/sections.h
index 71b32f2570ab..036c360910c5 100644
--- a/arch/x86/include/asm/sections.h
+++ b/arch/x86/include/asm/sections.h
@@ -6,7 +6,6 @@
#include <asm/extable.h>
extern char __brk_base[], __brk_limit[];
-extern struct exception_table_entry __stop___ex_table[];
extern char __end_rodata_aligned[];
#if defined(CONFIG_X86_64)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index ac3892920419..6669164abadc 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -31,6 +31,18 @@
*/
#define SEGMENT_RPL_MASK 0x3
+/*
+ * When running on Xen PV, the actual privilege level of the kernel is 1,
+ * not 0. Testing the Requested Privilege Level in a segment selector to
+ * determine whether the context is user mode or kernel mode with
+ * SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
+ * matches the 0x3 mask.
+ *
+ * Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
+ * kernels because privilege level 2 is never used.
+ */
+#define USER_SEGMENT_RPL_MASK 0x2
+
/* User mode is privilege level 3: */
#define USER_RPL 0x3
diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h
index 18a4b6890fa8..0e059b73437b 100644
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -103,7 +103,17 @@ static inline void update_task_stack(struct task_struct *task)
if (static_cpu_has(X86_FEATURE_XENPV))
load_sp0(task_top_of_stack(task));
#endif
+}
+static inline void kthread_frame_init(struct inactive_task_frame *frame,
+ unsigned long fun, unsigned long arg)
+{
+ frame->bx = fun;
+#ifdef CONFIG_X86_32
+ frame->di = arg;
+#else
+ frame->r12 = arg;
+#endif
}
#endif /* _ASM_X86_SWITCH_TO_H */
diff --git a/arch/x86/include/asm/syscall_wrapper.h b/arch/x86/include/asm/syscall_wrapper.h
index e046a405743d..e2389ce9bf58 100644
--- a/arch/x86/include/asm/syscall_wrapper.h
+++ b/arch/x86/include/asm/syscall_wrapper.h
@@ -6,6 +6,8 @@
#ifndef _ASM_X86_SYSCALL_WRAPPER_H
#define _ASM_X86_SYSCALL_WRAPPER_H
+struct pt_regs;
+
/* Mapping of registers to parameters for syscalls on x86-64 and x32 */
#define SC_X86_64_REGS_TO_ARGS(x, ...) \
__MAP(x,__SC_ARGS \
@@ -28,13 +30,21 @@
* kernel/sys_ni.c and SYS_NI in kernel/time/posix-stubs.c to cover this
* case as well.
*/
+#define __IA32_COMPAT_SYS_STUB0(x, name) \
+ asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__ia32_compat_sys_##name, ERRNO); \
+ asmlinkage long __ia32_compat_sys_##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys_##name(); \
+ }
+
#define __IA32_COMPAT_SYS_STUBx(x, name, ...) \
asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs);\
ALLOW_ERROR_INJECTION(__ia32_compat_sys##name, ERRNO); \
asmlinkage long __ia32_compat_sys##name(const struct pt_regs *regs)\
{ \
return __se_compat_sys##name(SC_IA32_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ }
#define __IA32_SYS_STUBx(x, name, ...) \
asmlinkage long __ia32_sys##name(const struct pt_regs *regs); \
@@ -48,16 +58,23 @@
* To keep the naming coherent, re-define SYSCALL_DEFINE0 to create an alias
* named __ia32_sys_*()
*/
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
- asmlinkage long __x64_sys_##sname(void)
-#define COND_SYSCALL(name) \
- cond_syscall(__x64_sys_##name); \
- cond_syscall(__ia32_sys_##name)
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ SYSCALL_ALIAS(__ia32_sys_##sname, __x64_sys_##sname); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
+
+#define COND_SYSCALL(name) \
+ asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
+ { \
+ return sys_ni_syscall(); \
+ } \
+ asmlinkage __weak long __ia32_sys_##name(const struct pt_regs *__unused)\
+ { \
+ return sys_ni_syscall(); \
+ }
#define SYS_NI(name) \
SYSCALL_ALIAS(__x64_sys_##name, sys_ni_posix_timers); \
@@ -75,15 +92,24 @@
* of the x86-64-style parameter ordering of x32 syscalls. The syscalls common
* with x86_64 obviously do not need such care.
*/
+#define __X32_COMPAT_SYS_STUB0(x, name, ...) \
+ asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs);\
+ ALLOW_ERROR_INJECTION(__x32_compat_sys_##name, ERRNO); \
+ asmlinkage long __x32_compat_sys_##name(const struct pt_regs *regs)\
+ { \
+ return __se_compat_sys_##name();\
+ }
+
#define __X32_COMPAT_SYS_STUBx(x, name, ...) \
asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs);\
ALLOW_ERROR_INJECTION(__x32_compat_sys##name, ERRNO); \
asmlinkage long __x32_compat_sys##name(const struct pt_regs *regs)\
{ \
return __se_compat_sys##name(SC_X86_64_REGS_TO_ARGS(x,__VA_ARGS__));\
- } \
+ }
#else /* CONFIG_X86_X32 */
+#define __X32_COMPAT_SYS_STUB0(x, name)
#define __X32_COMPAT_SYS_STUBx(x, name, ...)
#endif /* CONFIG_X86_X32 */
@@ -94,6 +120,17 @@
* mapping of registers to parameters, we need to generate stubs for each
* of them.
*/
+#define COMPAT_SYSCALL_DEFINE0(name) \
+ static long __se_compat_sys_##name(void); \
+ static inline long __do_compat_sys_##name(void); \
+ __IA32_COMPAT_SYS_STUB0(x, name) \
+ __X32_COMPAT_SYS_STUB0(x, name) \
+ static long __se_compat_sys_##name(void) \
+ { \
+ return __do_compat_sys_##name(); \
+ } \
+ static inline long __do_compat_sys_##name(void)
+
#define COMPAT_SYSCALL_DEFINEx(x, name, ...) \
static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \
static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
@@ -181,15 +218,19 @@
* macros to work correctly.
*/
#ifndef SYSCALL_DEFINE0
-#define SYSCALL_DEFINE0(sname) \
- SYSCALL_METADATA(_##sname, 0); \
- asmlinkage long __x64_sys_##sname(void); \
- ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
- asmlinkage long __x64_sys_##sname(void)
+#define SYSCALL_DEFINE0(sname) \
+ SYSCALL_METADATA(_##sname, 0); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused);\
+ ALLOW_ERROR_INJECTION(__x64_sys_##sname, ERRNO); \
+ asmlinkage long __x64_sys_##sname(const struct pt_regs *__unused)
#endif
#ifndef COND_SYSCALL
-#define COND_SYSCALL(name) cond_syscall(__x64_sys_##name)
+#define COND_SYSCALL(name) \
+ asmlinkage __weak long __x64_sys_##name(const struct pt_regs *__unused) \
+ { \
+ return sys_ni_syscall(); \
+ }
#endif
#ifndef SYS_NI
@@ -201,7 +242,6 @@
* For VSYSCALLS, we need to declare these three syscalls with the new
* pt_regs-based calling convention for in-kernel use.
*/
-struct pt_regs;
asmlinkage long __x64_sys_getcpu(const struct pt_regs *regs);
asmlinkage long __x64_sys_gettimeofday(const struct pt_regs *regs);
asmlinkage long __x64_sys_time(const struct pt_regs *regs);
diff --git a/arch/x86/include/asm/tce.h b/arch/x86/include/asm/tce.h
deleted file mode 100644
index 6ed2deacf1d0..000000000000
--- a/arch/x86/include/asm/tce.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file is derived from asm-powerpc/tce.h.
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- * Author: Jon Mason <jdmason@us.ibm.com>
- */
-
-#ifndef _ASM_X86_TCE_H
-#define _ASM_X86_TCE_H
-
-extern unsigned int specified_table_size;
-struct iommu_table;
-
-#define TCE_ENTRY_SIZE 8 /* in bytes */
-
-#define TCE_READ_SHIFT 0
-#define TCE_WRITE_SHIFT 1
-#define TCE_HUBID_SHIFT 2 /* unused */
-#define TCE_RSVD_SHIFT 8 /* unused */
-#define TCE_RPN_SHIFT 12
-#define TCE_UNUSED_SHIFT 48 /* unused */
-
-#define TCE_RPN_MASK 0x0000fffffffff000ULL
-
-extern void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction);
-extern void tce_free(struct iommu_table *tbl, long index, unsigned int npages);
-extern void * __init alloc_tce_table(void);
-extern void __init free_tce_table(void *tbl);
-extern int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar);
-
-#endif /* _ASM_X86_TCE_H */
diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 5e8319bb207a..23c626a742e8 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -26,10 +26,11 @@ static inline void apply_paravirt(struct paravirt_patch_site *start,
#define POKE_MAX_OPCODE_SIZE 5
struct text_poke_loc {
- void *detour;
void *addr;
- size_t len;
- const char opcode[POKE_MAX_OPCODE_SIZE];
+ int len;
+ s32 rel32;
+ u8 opcode;
+ const u8 text[POKE_MAX_OPCODE_SIZE];
};
extern void text_poke_early(void *addr, const void *opcode, size_t len);
@@ -51,8 +52,10 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
extern int poke_int3_handler(struct pt_regs *regs);
-extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler);
+extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries);
+extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate);
extern int after_bootmem;
extern __ro_after_init struct mm_struct *poking_mm;
extern __ro_after_init unsigned long poking_addr;
@@ -63,8 +66,17 @@ static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
regs->ip = ip;
}
-#define INT3_INSN_SIZE 1
-#define CALL_INSN_SIZE 5
+#define INT3_INSN_SIZE 1
+#define INT3_INSN_OPCODE 0xCC
+
+#define CALL_INSN_SIZE 5
+#define CALL_INSN_OPCODE 0xE8
+
+#define JMP32_INSN_SIZE 5
+#define JMP32_INSN_OPCODE 0xE9
+
+#define JMP8_INSN_SIZE 2
+#define JMP8_INSN_OPCODE 0xEB
static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
{
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index f9453536f9bb..d779366ce3f8 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -143,8 +143,8 @@ struct thread_info {
_TIF_NOHZ)
/* flags to check in __switch_to() */
-#define _TIF_WORK_CTXSW_BASE \
- (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \
+#define _TIF_WORK_CTXSW_BASE \
+ (_TIF_NOCPUID | _TIF_NOTSC | _TIF_BLOCKSTEP | \
_TIF_SSBD | _TIF_SPEC_FORCE_UPDATE)
/*
@@ -156,8 +156,14 @@ struct thread_info {
# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE)
#endif
-#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
-#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
+#ifdef CONFIG_X86_IOPL_IOPERM
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY | \
+ _TIF_IO_BITMAP)
+#else
+# define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW| _TIF_USER_RETURN_NOTIFY)
+#endif
+
+#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
diff --git a/arch/x86/include/asm/trace/hyperv.h b/arch/x86/include/asm/trace/hyperv.h
index ace464f09681..4d705cb4d63b 100644
--- a/arch/x86/include/asm/trace/hyperv.h
+++ b/arch/x86/include/asm/trace/hyperv.h
@@ -71,6 +71,21 @@ TRACE_EVENT(hyperv_send_ipi_mask,
__entry->ncpus, __entry->vector)
);
+TRACE_EVENT(hyperv_send_ipi_one,
+ TP_PROTO(int cpu,
+ int vector),
+ TP_ARGS(cpu, vector),
+ TP_STRUCT__entry(
+ __field(int, cpu)
+ __field(int, vector)
+ ),
+ TP_fast_assign(__entry->cpu = cpu;
+ __entry->vector = vector;
+ ),
+ TP_printk("cpu %d vector %x",
+ __entry->cpu, __entry->vector)
+ );
+
#endif /* CONFIG_HYPERV */
#undef TRACE_INCLUDE_PATH
diff --git a/arch/x86/include/asm/umip.h b/arch/x86/include/asm/umip.h
index db43f2a0d92c..aeed98c3c9e1 100644
--- a/arch/x86/include/asm/umip.h
+++ b/arch/x86/include/asm/umip.h
@@ -4,9 +4,9 @@
#include <linux/types.h>
#include <asm/ptrace.h>
-#ifdef CONFIG_X86_INTEL_UMIP
+#ifdef CONFIG_X86_UMIP
bool fixup_umip_exception(struct pt_regs *regs);
#else
static inline bool fixup_umip_exception(struct pt_regs *regs) { return false; }
-#endif /* CONFIG_X86_INTEL_UMIP */
+#endif /* CONFIG_X86_UMIP */
#endif /* _ASM_X86_UMIP_H */
diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h
index 0bcdb1279361..f5e2eb12cb71 100644
--- a/arch/x86/include/asm/unwind_hints.h
+++ b/arch/x86/include/asm/unwind_hints.h
@@ -86,6 +86,14 @@
UNWIND_HINT sp_offset=\sp_offset
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else /* !__ASSEMBLY__ */
#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h
index 6e7caf65fa40..389174eaec79 100644
--- a/arch/x86/include/asm/uv/bios.h
+++ b/arch/x86/include/asm/uv/bios.h
@@ -138,7 +138,7 @@ extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect);
extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *);
extern int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus);
-extern void uv_bios_init(void);
+extern int uv_bios_init(void);
extern unsigned long sn_rtc_cycles_per_second;
extern int uv_type;
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
index 6bc6d89d8e2a..45ea95ce79b4 100644
--- a/arch/x86/include/asm/uv/uv.h
+++ b/arch/x86/include/asm/uv/uv.h
@@ -12,6 +12,16 @@ struct mm_struct;
#ifdef CONFIG_X86_UV
#include <linux/efi.h>
+#define UV_PROC_NODE "sgi_uv"
+
+static inline int uv(int uvtype)
+{
+ /* uv(0) is "any" */
+ if (uvtype >= 0 && uvtype <= 30)
+ return 1 << uvtype;
+ return 1;
+}
+
extern unsigned long uv_systab_phys;
extern enum uv_system_type get_uv_system_type(void);
@@ -20,7 +30,8 @@ static inline bool is_early_uv_system(void)
return uv_systab_phys && uv_systab_phys != EFI_INVALID_TABLE_ADDR;
}
extern int is_uv_system(void);
-extern int is_uv_hubless(void);
+extern int is_uv_hubbed(int uvtype);
+extern int is_uv_hubless(int uvtype);
extern void uv_cpu_init(void);
extern void uv_nmi_init(void);
extern void uv_system_init(void);
@@ -32,7 +43,8 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
static inline bool is_early_uv_system(void) { return 0; }
static inline int is_uv_system(void) { return 0; }
-static inline int is_uv_hubless(void) { return 0; }
+static inline int is_uv_hubbed(int uv) { return 0; }
+static inline int is_uv_hubless(int uv) { return 0; }
static inline void uv_cpu_init(void) { }
static inline void uv_system_init(void) { }
static inline const struct cpumask *
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index 44cf6d6deb7a..950cd1395d5d 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -19,6 +19,7 @@
#include <linux/topology.h>
#include <asm/types.h>
#include <asm/percpu.h>
+#include <asm/uv/uv.h>
#include <asm/uv/uv_mmrs.h>
#include <asm/uv/bios.h>
#include <asm/irq_vectors.h>
@@ -243,83 +244,61 @@ static inline int uv_hub_info_check(int version)
#define UV4_HUB_REVISION_BASE 7
#define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */
-#ifdef UV1_HUB_IS_SUPPORTED
+/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */
static inline int is_uv1_hub(void)
{
- return uv_hub_info->hub_revision < UV2_HUB_REVISION_BASE;
-}
+#ifdef UV1_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(1));
#else
-static inline int is_uv1_hub(void)
-{
return 0;
-}
#endif
+}
-#ifdef UV2_HUB_IS_SUPPORTED
static inline int is_uv2_hub(void)
{
- return ((uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV3_HUB_REVISION_BASE));
-}
+#ifdef UV2_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(2));
#else
-static inline int is_uv2_hub(void)
-{
return 0;
-}
#endif
+}
-#ifdef UV3_HUB_IS_SUPPORTED
static inline int is_uv3_hub(void)
{
- return ((uv_hub_info->hub_revision >= UV3_HUB_REVISION_BASE) &&
- (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE));
-}
+#ifdef UV3_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(3));
#else
-static inline int is_uv3_hub(void)
-{
return 0;
-}
#endif
+}
/* First test "is UV4A", then "is UV4" */
-#ifdef UV4A_HUB_IS_SUPPORTED
-static inline int is_uv4a_hub(void)
-{
- return (uv_hub_info->hub_revision >= UV4A_HUB_REVISION_BASE);
-}
-#else
static inline int is_uv4a_hub(void)
{
+#ifdef UV4A_HUB_IS_SUPPORTED
+ if (is_uv_hubbed(uv(4)))
+ return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE);
+#endif
return 0;
}
-#endif
-#ifdef UV4_HUB_IS_SUPPORTED
static inline int is_uv4_hub(void)
{
- return uv_hub_info->hub_revision >= UV4_HUB_REVISION_BASE;
-}
+#ifdef UV4_HUB_IS_SUPPORTED
+ return is_uv_hubbed(uv(4));
#else
-static inline int is_uv4_hub(void)
-{
return 0;
-}
#endif
+}
static inline int is_uvx_hub(void)
{
- if (uv_hub_info->hub_revision >= UV2_HUB_REVISION_BASE)
- return uv_hub_info->hub_revision;
-
- return 0;
+ return (is_uv_hubbed(-2) >= uv(2));
}
static inline int is_uv_hub(void)
{
-#ifdef UV1_HUB_IS_SUPPORTED
- return uv_hub_info->hub_revision;
-#endif
- return is_uvx_hub();
+ return is_uv1_hub() || is_uvx_hub();
}
union uvh_apicid {
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h
index 42e1245af0d8..ff4b52e37e60 100644
--- a/arch/x86/include/asm/xen/hypervisor.h
+++ b/arch/x86/include/asm/xen/hypervisor.h
@@ -62,6 +62,4 @@ void xen_arch_register_cpu(int num);
void xen_arch_unregister_cpu(int num);
#endif
-extern void xen_set_iopl_mask(unsigned mask);
-
#endif /* _ASM_X86_XEN_HYPERVISOR_H */
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 62ca03ef5c65..9139b3e86316 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -379,12 +379,9 @@ struct xen_pmu_arch {
* Prefix forces emulation of some non-trapping instructions.
* Currently only CPUID.
*/
-#ifdef __ASSEMBLY__
-#define XEN_EMULATE_PREFIX .byte 0x0f,0x0b,0x78,0x65,0x6e ;
-#define XEN_CPUID XEN_EMULATE_PREFIX cpuid
-#else
-#define XEN_EMULATE_PREFIX ".byte 0x0f,0x0b,0x78,0x65,0x6e ; "
-#define XEN_CPUID XEN_EMULATE_PREFIX "cpuid"
-#endif
+#include <asm/emulate_prefix.h>
+
+#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
+#define XEN_CPUID XEN_EMULATE_PREFIX __ASM_FORM(cpuid)
#endif /* _ASM_X86_XEN_INTERFACE_H */
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index c895df5482c5..8669c6bdbb84 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -2,7 +2,7 @@
#ifndef _ASM_X86_BOOTPARAM_H
#define _ASM_X86_BOOTPARAM_H
-/* setup_data types */
+/* setup_data/setup_indirect types */
#define SETUP_NONE 0
#define SETUP_E820_EXT 1
#define SETUP_DTB 2
@@ -11,6 +11,11 @@
#define SETUP_APPLE_PROPERTIES 5
#define SETUP_JAILHOUSE 6
+#define SETUP_INDIRECT (1<<31)
+
+/* SETUP_INDIRECT | max(SETUP_*) */
+#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE)
+
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
#define RAMDISK_PROMPT_FLAG 0x8000
@@ -49,6 +54,14 @@ struct setup_data {
__u8 data[0];
};
+/* extensible setup indirect data node */
+struct setup_indirect {
+ __u32 type;
+ __u32 reserved; /* Reserved, must be set to zero. */
+ __u64 len;
+ __u64 addr;
+};
+
struct setup_header {
__u8 setup_sects;
__u16 root_flags;
@@ -88,6 +101,7 @@ struct setup_header {
__u64 pref_address;
__u32 init_size;
__u32 handover_offset;
+ __u32 kernel_info_offset;
} __attribute__((packed));
struct sys_desc_table {
@@ -139,15 +153,22 @@ struct boot_e820_entry {
* setup data structure.
*/
struct jailhouse_setup_data {
- __u16 version;
- __u16 compatible_version;
- __u16 pm_timer_address;
- __u16 num_cpus;
- __u64 pci_mmconfig_base;
- __u32 tsc_khz;
- __u32 apic_khz;
- __u8 standard_ioapic;
- __u8 cpu_ids[255];
+ struct {
+ __u16 version;
+ __u16 compatible_version;
+ } __attribute__((packed)) hdr;
+ struct {
+ __u16 pm_timer_address;
+ __u16 num_cpus;
+ __u64 pci_mmconfig_base;
+ __u32 tsc_khz;
+ __u32 apic_khz;
+ __u8 standard_ioapic;
+ __u8 cpu_ids[255];
+ } __attribute__((packed)) v1;
+ struct {
+ __u32 flags;
+ } __attribute__((packed)) v2;
} __attribute__((packed));
/* The so-called "zeropage" */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3578ad248bc9..32acb970f416 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -134,7 +134,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o
obj-$(CONFIG_PERF_EVENTS) += perf_regs.o
obj-$(CONFIG_TRACING) += tracepoint.o
obj-$(CONFIG_SCHED_MC_PRIO) += itmt.o
-obj-$(CONFIG_X86_INTEL_UMIP) += umip.o
+obj-$(CONFIG_X86_UMIP) += umip.o
obj-$(CONFIG_UNWINDER_ORC) += unwind_orc.o
obj-$(CONFIG_UNWINDER_FRAME_POINTER) += unwind_frame.o
@@ -146,7 +146,6 @@ ifeq ($(CONFIG_X86_64),y)
obj-$(CONFIG_AUDIT) += audit_64.o
obj-$(CONFIG_GART_IOMMU) += amd_gart_64.o aperture_64.o
- obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
obj-$(CONFIG_MMCONF_FAM10H) += mmconf-fam10h_64.o
obj-y += vsmp_64.o
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index e95e95960156..daf88f8143c5 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -9,8 +9,7 @@
.code32
ALIGN
-ENTRY(wakeup_pmode_return)
-wakeup_pmode_return:
+SYM_CODE_START(wakeup_pmode_return)
movw $__KERNEL_DS, %ax
movw %ax, %ss
movw %ax, %fs
@@ -39,6 +38,7 @@ wakeup_pmode_return:
# jump to place where we left off
movl saved_eip, %eax
jmp *%eax
+SYM_CODE_END(wakeup_pmode_return)
bogus_magic:
jmp bogus_magic
@@ -72,7 +72,7 @@ restore_registers:
popfl
ret
-ENTRY(do_suspend_lowlevel)
+SYM_CODE_START(do_suspend_lowlevel)
call save_processor_state
call save_registers
pushl $3
@@ -87,10 +87,11 @@ ret_point:
call restore_registers
call restore_processor_state
ret
+SYM_CODE_END(do_suspend_lowlevel)
.data
ALIGN
-ENTRY(saved_magic) .long 0
+SYM_DATA(saved_magic, .long 0)
saved_eip: .long 0
# saved registers
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 7f9ade13bbcf..c8daa92f38dc 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -14,7 +14,7 @@
/*
* Hooray, we are in Long 64-bit mode (but still running in low memory)
*/
-ENTRY(wakeup_long64)
+SYM_FUNC_START(wakeup_long64)
movq saved_magic, %rax
movq $0x123456789abcdef0, %rdx
cmpq %rdx, %rax
@@ -40,9 +40,9 @@ ENTRY(wakeup_long64)
movq saved_rip, %rax
jmp *%rax
-ENDPROC(wakeup_long64)
+SYM_FUNC_END(wakeup_long64)
-ENTRY(do_suspend_lowlevel)
+SYM_FUNC_START(do_suspend_lowlevel)
FRAME_BEGIN
subq $8, %rsp
xorl %eax, %eax
@@ -125,7 +125,7 @@ ENTRY(do_suspend_lowlevel)
addq $8, %rsp
FRAME_END
jmp restore_processor_state
-ENDPROC(do_suspend_lowlevel)
+SYM_FUNC_END(do_suspend_lowlevel)
.data
saved_rbp: .quad 0
@@ -136,4 +136,4 @@ saved_rbx: .quad 0
saved_rip: .quad 0
saved_rsp: .quad 0
-ENTRY(saved_magic) .quad 0
+SYM_DATA(saved_magic, .quad 0)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9d3a971ea364..9ec463fe96f2 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -956,16 +956,15 @@ NOKPROBE_SYMBOL(patch_cmp);
int poke_int3_handler(struct pt_regs *regs)
{
struct text_poke_loc *tp;
- unsigned char int3 = 0xcc;
void *ip;
/*
* Having observed our INT3 instruction, we now must observe
* bp_patching.nr_entries.
*
- * nr_entries != 0 INT3
- * WMB RMB
- * write INT3 if (nr_entries)
+ * nr_entries != 0 INT3
+ * WMB RMB
+ * write INT3 if (nr_entries)
*
* Idem for other elements in bp_patching.
*/
@@ -978,9 +977,9 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
/*
- * Discount the sizeof(int3). See text_poke_bp_batch().
+ * Discount the INT3. See text_poke_bp_batch().
*/
- ip = (void *) regs->ip - sizeof(int3);
+ ip = (void *) regs->ip - INT3_INSN_SIZE;
/*
* Skip the binary search if there is a single member in the vector.
@@ -997,8 +996,28 @@ int poke_int3_handler(struct pt_regs *regs)
return 0;
}
- /* set up the specified breakpoint detour */
- regs->ip = (unsigned long) tp->detour;
+ ip += tp->len;
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ /*
+ * Someone poked an explicit INT3, they'll want to handle it,
+ * do not consume.
+ */
+ return 0;
+
+ case CALL_INSN_OPCODE:
+ int3_emulate_call(regs, (long)ip + tp->rel32);
+ break;
+
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ int3_emulate_jmp(regs, (long)ip + tp->rel32);
+ break;
+
+ default:
+ BUG();
+ }
return 1;
}
@@ -1014,7 +1033,7 @@ NOKPROBE_SYMBOL(poke_int3_handler);
* synchronization using int3 breakpoint.
*
* The way it is done:
- * - For each entry in the vector:
+ * - For each entry in the vector:
* - add a int3 trap to the address that will be patched
* - sync cores
* - For each entry in the vector:
@@ -1027,9 +1046,9 @@ NOKPROBE_SYMBOL(poke_int3_handler);
*/
void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
{
- int patched_all_but_first = 0;
- unsigned char int3 = 0xcc;
+ unsigned char int3 = INT3_INSN_OPCODE;
unsigned int i;
+ int do_sync;
lockdep_assert_held(&text_mutex);
@@ -1053,16 +1072,16 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
/*
* Second step: update all but the first byte of the patched range.
*/
- for (i = 0; i < nr_entries; i++) {
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
if (tp[i].len - sizeof(int3) > 0) {
text_poke((char *)tp[i].addr + sizeof(int3),
- (const char *)tp[i].opcode + sizeof(int3),
+ (const char *)tp[i].text + sizeof(int3),
tp[i].len - sizeof(int3));
- patched_all_but_first++;
+ do_sync++;
}
}
- if (patched_all_but_first) {
+ if (do_sync) {
/*
* According to Intel, this core syncing is very likely
* not necessary and we'd be safe even without it. But
@@ -1075,10 +1094,17 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* Third step: replace the first byte (int3) by the first byte of
* replacing opcode.
*/
- for (i = 0; i < nr_entries; i++)
- text_poke(tp[i].addr, tp[i].opcode, sizeof(int3));
+ for (do_sync = 0, i = 0; i < nr_entries; i++) {
+ if (tp[i].text[0] == INT3_INSN_OPCODE)
+ continue;
+
+ text_poke(tp[i].addr, tp[i].text, sizeof(int3));
+ do_sync++;
+ }
+
+ if (do_sync)
+ on_each_cpu(do_sync_core, NULL, 1);
- on_each_cpu(do_sync_core, NULL, 1);
/*
* sync_core() implies an smp_mb() and orders this store against
* the writing of the new instruction.
@@ -1087,6 +1113,60 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
bp_patching.nr_entries = 0;
}
+void text_poke_loc_init(struct text_poke_loc *tp, void *addr,
+ const void *opcode, size_t len, const void *emulate)
+{
+ struct insn insn;
+
+ if (!opcode)
+ opcode = (void *)tp->text;
+ else
+ memcpy((void *)tp->text, opcode, len);
+
+ if (!emulate)
+ emulate = opcode;
+
+ kernel_insn_init(&insn, emulate, MAX_INSN_SIZE);
+ insn_get_length(&insn);
+
+ BUG_ON(!insn_complete(&insn));
+ BUG_ON(len != insn.length);
+
+ tp->addr = addr;
+ tp->len = len;
+ tp->opcode = insn.opcode.bytes[0];
+
+ switch (tp->opcode) {
+ case INT3_INSN_OPCODE:
+ break;
+
+ case CALL_INSN_OPCODE:
+ case JMP32_INSN_OPCODE:
+ case JMP8_INSN_OPCODE:
+ tp->rel32 = insn.immediate.value;
+ break;
+
+ default: /* assume NOP */
+ switch (len) {
+ case 2: /* NOP2 -- emulate as JMP8+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[len], len));
+ tp->opcode = JMP8_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ case 5: /* NOP5 -- emulate as JMP32+0 */
+ BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len));
+ tp->opcode = JMP32_INSN_OPCODE;
+ tp->rel32 = 0;
+ break;
+
+ default: /* unknown instruction */
+ BUG();
+ }
+ break;
+ }
+}
+
/**
* text_poke_bp() -- update instructions on live kernel on SMP
* @addr: address to patch
@@ -1098,20 +1178,10 @@ void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries)
* dynamically allocated memory. This function should be used when it is
* not possible to allocate memory.
*/
-void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler)
+void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate)
{
- struct text_poke_loc tp = {
- .detour = handler,
- .addr = addr,
- .len = len,
- };
-
- if (len > POKE_MAX_OPCODE_SIZE) {
- WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE);
- return;
- }
-
- memcpy((void *)tp.opcode, opcode, len);
+ struct text_poke_loc tp;
+ text_poke_loc_init(&tp, addr, opcode, len, emulate);
text_poke_bp_batch(&tp, 1);
}
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index a6ac3712db8b..4bbccb9d16dc 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -510,10 +510,9 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
if (iommu_size < 64*1024*1024) {
- pr_warning(
- "PCI-DMA: Warning: Small IOMMU %luMB."
+ pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
" Consider increasing the AGP aperture in BIOS\n",
- iommu_size >> 20);
+ iommu_size >> 20);
}
return iommu_size;
@@ -665,8 +664,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info)
nommu:
/* Should not happen anymore */
- pr_warning("PCI-DMA: More than 4GB of RAM and no IOMMU\n"
- "falling back to iommu=soft.\n");
+ pr_warn("PCI-DMA: More than 4GB of RAM and no IOMMU - falling back to iommu=soft.\n");
return -1;
}
@@ -733,8 +731,8 @@ int __init gart_iommu_init(void)
!gart_iommu_aperture ||
(no_agp && init_amd_gatt(&info) < 0)) {
if (max_pfn > MAX_DMA32_PFN) {
- pr_warning("More than 4GB of memory but GART IOMMU not available.\n");
- pr_warning("falling back to iommu=soft.\n");
+ pr_warn("More than 4GB of memory but GART IOMMU not available.\n");
+ pr_warn("falling back to iommu=soft.\n");
}
return 0;
}
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 2b0faf86da1b..28446fa6bf18 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -780,8 +780,8 @@ calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
res = (((u64)deltapm) * mult) >> 22;
do_div(res, 1000000);
- pr_warning("APIC calibration not consistent "
- "with PM-Timer: %ldms instead of 100ms\n",(long)res);
+ pr_warn("APIC calibration not consistent "
+ "with PM-Timer: %ldms instead of 100ms\n", (long)res);
/* Correct the lapic counter value */
res = (((u64)(*delta)) * pm_100ms);
@@ -977,7 +977,7 @@ static int __init calibrate_APIC_clock(void)
*/
if (lapic_timer_period < (1000000 / HZ)) {
local_irq_enable();
- pr_warning("APIC frequency too slow, disabling apic timer\n");
+ pr_warn("APIC frequency too slow, disabling apic timer\n");
return -1;
}
@@ -1021,7 +1021,7 @@ static int __init calibrate_APIC_clock(void)
local_irq_enable();
if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
- pr_warning("APIC timer disabled due to verification failure\n");
+ pr_warn("APIC timer disabled due to verification failure\n");
return -1;
}
@@ -1095,8 +1095,8 @@ static void local_apic_timer_interrupt(void)
* spurious.
*/
if (!evt->event_handler) {
- pr_warning("Spurious LAPIC timer interrupt on cpu %d\n",
- smp_processor_id());
+ pr_warn("Spurious LAPIC timer interrupt on cpu %d\n",
+ smp_processor_id());
/* Switch it off */
lapic_timer_shutdown(evt);
return;
@@ -1811,11 +1811,11 @@ static int __init setup_nox2apic(char *str)
int apicid = native_apic_msr_read(APIC_ID);
if (apicid >= 255) {
- pr_warning("Apicid: %08x, cannot enforce nox2apic\n",
- apicid);
+ pr_warn("Apicid: %08x, cannot enforce nox2apic\n",
+ apicid);
return 0;
}
- pr_warning("x2apic already enabled.\n");
+ pr_warn("x2apic already enabled.\n");
__x2apic_disable();
}
setup_clear_cpu_cap(X86_FEATURE_X2APIC);
@@ -1983,7 +1983,7 @@ static int __init apic_verify(void)
*/
features = cpuid_edx(1);
if (!(features & (1 << X86_FEATURE_APIC))) {
- pr_warning("Could not enable APIC!\n");
+ pr_warn("Could not enable APIC!\n");
return -1;
}
set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
@@ -2337,7 +2337,7 @@ static int cpuid_to_apicid[] = {
#ifdef CONFIG_SMP
/**
* apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
- * @id: APIC ID to check
+ * @apicid: APIC ID to check
*/
bool apic_id_is_primary_thread(unsigned int apicid)
{
@@ -2410,9 +2410,8 @@ int generic_processor_info(int apicid, int version)
disabled_cpu_apicid == apicid) {
int thiscpu = num_processors + disabled_cpus;
- pr_warning("APIC: Disabling requested cpu."
- " Processor %d/0x%x ignored.\n",
- thiscpu, apicid);
+ pr_warn("APIC: Disabling requested cpu."
+ " Processor %d/0x%x ignored.\n", thiscpu, apicid);
disabled_cpus++;
return -ENODEV;
@@ -2426,8 +2425,7 @@ int generic_processor_info(int apicid, int version)
apicid != boot_cpu_physical_apicid) {
int thiscpu = max + disabled_cpus - 1;
- pr_warning(
- "APIC: NR_CPUS/possible_cpus limit of %i almost"
+ pr_warn("APIC: NR_CPUS/possible_cpus limit of %i almost"
" reached. Keeping one slot for boot cpu."
" Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
@@ -2438,9 +2436,8 @@ int generic_processor_info(int apicid, int version)
if (num_processors >= nr_cpu_ids) {
int thiscpu = max + disabled_cpus;
- pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
- "reached. Processor %d/0x%x ignored.\n",
- max, thiscpu, apicid);
+ pr_warn("APIC: NR_CPUS/possible_cpus limit of %i reached. "
+ "Processor %d/0x%x ignored.\n", max, thiscpu, apicid);
disabled_cpus++;
return -EINVAL;
@@ -2470,13 +2467,13 @@ int generic_processor_info(int apicid, int version)
* Validate version
*/
if (version == 0x0) {
- pr_warning("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
- cpu, apicid);
+ pr_warn("BIOS bug: APIC version is 0 for CPU %d/0x%x, fixing up to 0x10\n",
+ cpu, apicid);
version = 0x10;
}
if (version != boot_cpu_apic_version) {
- pr_warning("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
+ pr_warn("BIOS bug: APIC version mismatch, boot CPU: %x, CPU %d: version %x\n",
boot_cpu_apic_version, cpu, version);
}
@@ -2845,7 +2842,7 @@ static int __init apic_set_verbosity(char *arg)
apic_verbosity = APIC_VERBOSE;
#ifdef CONFIG_X86_64
else {
- pr_warning("APIC Verbosity level %s not recognised"
+ pr_warn("APIC Verbosity level %s not recognised"
" use apic=verbose or apic=debug\n", arg);
return -EINVAL;
}
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d6af97fd170a..913c88617848 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1725,19 +1725,20 @@ static bool io_apic_level_ack_pending(struct mp_chip_data *data)
return false;
}
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
- /* If we are moving the irq we need to mask it */
+ /* If we are moving the IRQ we need to mask it */
if (unlikely(irqd_is_setaffinity_pending(data))) {
- mask_ioapic_irq(data);
+ if (!irqd_irq_masked(data))
+ mask_ioapic_irq(data);
return true;
}
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
- if (unlikely(masked)) {
+ if (unlikely(moveit)) {
/* Only migrate the irq if the ack has been received.
*
* On rare occasions the broadcast level triggered ack gets
@@ -1766,15 +1767,17 @@ static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
*/
if (!io_apic_level_ack_pending(data->chip_data))
irq_move_masked_irq(data);
- unmask_ioapic_irq(data);
+ /* If the IRQ is masked in the core, leave it: */
+ if (!irqd_irq_masked(data))
+ unmask_ioapic_irq(data);
}
}
#else
-static inline bool ioapic_irqd_mask(struct irq_data *data)
+static inline bool ioapic_prepare_move(struct irq_data *data)
{
return false;
}
-static inline void ioapic_irqd_unmask(struct irq_data *data, bool masked)
+static inline void ioapic_finish_move(struct irq_data *data, bool moveit)
{
}
#endif
@@ -1783,11 +1786,11 @@ static void ioapic_ack_level(struct irq_data *irq_data)
{
struct irq_cfg *cfg = irqd_cfg(irq_data);
unsigned long v;
- bool masked;
+ bool moveit;
int i;
irq_complete_move(cfg);
- masked = ioapic_irqd_mask(irq_data);
+ moveit = ioapic_prepare_move(irq_data);
/*
* It appears there is an erratum which affects at least version 0x11
@@ -1842,7 +1845,7 @@ static void ioapic_ack_level(struct irq_data *irq_data)
eoi_ioapic_pin(cfg->vector, irq_data->chip_data);
}
- ioapic_irqd_unmask(irq_data, masked);
+ ioapic_finish_move(irq_data, moveit);
}
static void ioapic_ir_ack_level(struct irq_data *irq_data)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index e6230af19864..d5b51a740524 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -14,6 +14,8 @@
#include <linux/memory.h>
#include <linux/export.h>
#include <linux/pci.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
#include <asm/e820/api.h>
#include <asm/uv/uv_mmrs.h>
@@ -25,12 +27,17 @@
static DEFINE_PER_CPU(int, x2apic_extra_bits);
static enum uv_system_type uv_system_type;
-static bool uv_hubless_system;
+static int uv_hubbed_system;
+static int uv_hubless_system;
static u64 gru_start_paddr, gru_end_paddr;
static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr;
static u64 gru_dist_lmask, gru_dist_umask;
static union uvh_apicid uvh_apicid;
+/* Unpack OEM/TABLE ID's to be NULL terminated strings */
+static u8 oem_id[ACPI_OEM_ID_SIZE + 1];
+static u8 oem_table_id[ACPI_OEM_TABLE_ID_SIZE + 1];
+
/* Information derived from CPUID: */
static struct {
unsigned int apicid_shift;
@@ -248,17 +255,35 @@ static void __init uv_set_apicid_hibit(void)
}
}
-static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
+static void __init uv_stringify(int len, char *to, char *from)
+{
+ /* Relies on 'to' being NULL chars so result will be NULL terminated */
+ strncpy(to, from, len-1);
+}
+
+static int __init uv_acpi_madt_oem_check(char *_oem_id, char *_oem_table_id)
{
int pnodeid;
int uv_apic;
+ uv_stringify(sizeof(oem_id), oem_id, _oem_id);
+ uv_stringify(sizeof(oem_table_id), oem_table_id, _oem_table_id);
+
if (strncmp(oem_id, "SGI", 3) != 0) {
- if (strncmp(oem_id, "NSGI", 4) == 0) {
- uv_hubless_system = true;
- pr_info("UV: OEM IDs %s/%s, HUBLESS\n",
- oem_id, oem_table_id);
- }
+ if (strncmp(oem_id, "NSGI", 4) != 0)
+ return 0;
+
+ /* UV4 Hubless, CH, (0x11:UV4+Any) */
+ if (strncmp(oem_id, "NSGI4", 5) == 0)
+ uv_hubless_system = 0x11;
+
+ /* UV3 Hubless, UV300/MC990X w/o hub (0x9:UV3+Any) */
+ else
+ uv_hubless_system = 0x9;
+
+ pr_info("UV: OEM IDs %s/%s, HUBLESS(0x%x)\n",
+ oem_id, oem_table_id, uv_hubless_system);
+
return 0;
}
@@ -286,6 +311,24 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
if (uv_hub_info->hub_revision == 0)
goto badbios;
+ switch (uv_hub_info->hub_revision) {
+ case UV4_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x11;
+ break;
+
+ case UV3_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x9;
+ break;
+
+ case UV2_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x5;
+ break;
+
+ case UV1_HUB_REVISION_BASE:
+ uv_hubbed_system = 0x3;
+ break;
+ }
+
pnodeid = early_get_pnodeid();
early_get_apic_socketid_shift();
@@ -336,9 +379,15 @@ int is_uv_system(void)
}
EXPORT_SYMBOL_GPL(is_uv_system);
-int is_uv_hubless(void)
+int is_uv_hubbed(int uvtype)
+{
+ return (uv_hubbed_system & uvtype);
+}
+EXPORT_SYMBOL_GPL(is_uv_hubbed);
+
+int is_uv_hubless(int uvtype)
{
- return uv_hubless_system;
+ return (uv_hubless_system & uvtype);
}
EXPORT_SYMBOL_GPL(is_uv_hubless);
@@ -1255,7 +1304,8 @@ static int __init decode_uv_systab(void)
struct uv_systab *st;
int i;
- if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE)
+ /* If system is uv3 or lower, there is no extended UVsystab */
+ if (is_uv_hubbed(0xfffffe) < uv(4) && is_uv_hubless(0xfffffe) < uv(4))
return 0; /* No extended UVsystab required */
st = uv_systab;
@@ -1434,6 +1484,103 @@ static void __init build_socket_tables(void)
}
}
+/* Check which reboot to use */
+static void check_efi_reboot(void)
+{
+ /* If EFI reboot not available, use ACPI reboot */
+ if (!efi_enabled(EFI_BOOT))
+ reboot_type = BOOT_ACPI;
+}
+
+/* Setup user proc fs files */
+static int proc_hubbed_show(struct seq_file *file, void *data)
+{
+ seq_printf(file, "0x%x\n", uv_hubbed_system);
+ return 0;
+}
+
+static int proc_hubless_show(struct seq_file *file, void *data)
+{
+ seq_printf(file, "0x%x\n", uv_hubless_system);
+ return 0;
+}
+
+static int proc_oemid_show(struct seq_file *file, void *data)
+{
+ seq_printf(file, "%s/%s\n", oem_id, oem_table_id);
+ return 0;
+}
+
+static int proc_hubbed_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_hubbed_show, (void *)NULL);
+}
+
+static int proc_hubless_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_hubless_show, (void *)NULL);
+}
+
+static int proc_oemid_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, proc_oemid_show, (void *)NULL);
+}
+
+/* (struct is "non-const" as open function is set at runtime) */
+static struct file_operations proc_version_fops = {
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static const struct file_operations proc_oemid_fops = {
+ .open = proc_oemid_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+static __init void uv_setup_proc_files(int hubless)
+{
+ struct proc_dir_entry *pde;
+ char *name = hubless ? "hubless" : "hubbed";
+
+ pde = proc_mkdir(UV_PROC_NODE, NULL);
+ proc_create("oemid", 0, pde, &proc_oemid_fops);
+ proc_create(name, 0, pde, &proc_version_fops);
+ if (hubless)
+ proc_version_fops.open = proc_hubless_open;
+ else
+ proc_version_fops.open = proc_hubbed_open;
+}
+
+/* Initialize UV hubless systems */
+static __init int uv_system_init_hubless(void)
+{
+ int rc;
+
+ /* Setup PCH NMI handler */
+ uv_nmi_setup_hubless();
+
+ /* Init kernel/BIOS interface */
+ rc = uv_bios_init();
+ if (rc < 0)
+ return rc;
+
+ /* Process UVsystab */
+ rc = decode_uv_systab();
+ if (rc < 0)
+ return rc;
+
+ /* Create user access node */
+ if (rc >= 0)
+ uv_setup_proc_files(1);
+
+ check_efi_reboot();
+
+ return rc;
+}
+
static void __init uv_system_init_hub(void)
{
struct uv_hub_info_s hub_info = {0};
@@ -1559,32 +1706,27 @@ static void __init uv_system_init_hub(void)
uv_nmi_setup();
uv_cpu_init();
uv_scir_register_cpu_notifier();
- proc_mkdir("sgi_uv", NULL);
+ uv_setup_proc_files(0);
/* Register Legacy VGA I/O redirection handler: */
pci_register_set_vga_state(uv_set_vga_state);
- /*
- * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as
- * EFI is not enabled in the kdump kernel:
- */
- if (is_kdump_kernel())
- reboot_type = BOOT_ACPI;
+ check_efi_reboot();
}
/*
- * There is a small amount of UV specific code needed to initialize a
- * UV system that does not have a "UV HUB" (referred to as "hubless").
+ * There is a different code path needed to initialize a UV system that does
+ * not have a "UV HUB" (referred to as "hubless").
*/
void __init uv_system_init(void)
{
- if (likely(!is_uv_system() && !is_uv_hubless()))
+ if (likely(!is_uv_system() && !is_uv_hubless(1)))
return;
if (is_uv_system())
uv_system_init_hub();
else
- uv_nmi_setup_hubless();
+ uv_system_init_hubless();
}
apic_driver(apic_x2apic_uv_x);
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4c7b0fa15a19..8bf64899f56a 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -39,6 +39,7 @@ static void __init spectre_v2_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
+static void __init mds_print_mitigation(void);
static void __init taa_select_mitigation(void);
/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
@@ -108,6 +109,12 @@ void __init check_bugs(void)
mds_select_mitigation();
taa_select_mitigation();
+ /*
+ * As MDS and TAA mitigations are inter-related, print MDS
+ * mitigation until after TAA mitigation selection is done.
+ */
+ mds_print_mitigation();
+
arch_smt_update();
#ifdef CONFIG_X86_32
@@ -245,6 +252,12 @@ static void __init mds_select_mitigation(void)
(mds_nosmt || cpu_mitigations_auto_nosmt()))
cpu_smt_disable(false);
}
+}
+
+static void __init mds_print_mitigation(void)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
+ return;
pr_info("%s\n", mds_strings[mds_mitigation]);
}
@@ -304,8 +317,12 @@ static void __init taa_select_mitigation(void)
return;
}
- /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */
- if (taa_mitigation == TAA_MITIGATION_OFF)
+ /*
+ * TAA mitigation via VERW is turned off if both
+ * tsx_async_abort=off and mds=off are specified.
+ */
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ mds_mitigation == MDS_MITIGATION_OFF)
goto out;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
@@ -339,6 +356,15 @@ static void __init taa_select_mitigation(void)
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
+ /*
+ * Update MDS mitigation, if necessary, as the mds_user_clear is
+ * now enabled for TAA mitigation.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+ mds_mitigation = MDS_MITIGATION_FULL;
+ mds_select_mitigation();
+ }
out:
pr_info("%s\n", taa_strings[taa_mitigation]);
}
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fffe21945374..baa2fed8deb6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -53,10 +53,7 @@
#include <asm/microcode_intel.h>
#include <asm/intel-family.h>
#include <asm/cpu_device_id.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/uv/uv.h>
-#endif
#include "cpu.h"
@@ -565,8 +562,9 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
return NULL; /* Not found */
}
-__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS];
-__u32 cpu_caps_set[NCAPINTS + NBUGINTS];
+/* Aligned to unsigned long to avoid split lock in atomic bitmap ops */
+__u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
+__u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
void load_percpu_segment(int cpu)
{
@@ -1780,7 +1778,7 @@ static void wait_for_master_cpu(int cpu)
}
#ifdef CONFIG_X86_64
-static void setup_getcpu(int cpu)
+static inline void setup_getcpu(int cpu)
{
unsigned long cpudata = vdso_encode_cpunode(cpu, early_cpu_to_node(cpu));
struct desc_struct d = { };
@@ -1800,7 +1798,59 @@ static void setup_getcpu(int cpu)
write_gdt_entry(get_cpu_gdt_rw(cpu), GDT_ENTRY_CPUNODE, &d, DESCTYPE_S);
}
+
+static inline void ucode_cpu_init(int cpu)
+{
+ if (cpu)
+ load_ucode_ap();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss)
+{
+ /* Set up the per-CPU TSS IST stacks */
+ tss->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
+ tss->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
+ tss->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
+ tss->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+}
+
+static inline void gdt_setup_doublefault_tss(int cpu) { }
+
+#else /* CONFIG_X86_64 */
+
+static inline void setup_getcpu(int cpu) { }
+
+static inline void ucode_cpu_init(int cpu)
+{
+ show_ucode_info_early();
+}
+
+static inline void tss_setup_ist(struct tss_struct *tss) { }
+
+static inline void gdt_setup_doublefault_tss(int cpu)
+{
+#ifdef CONFIG_DOUBLEFAULT
+ /* Set up the doublefault TSS pointer in the GDT */
+ __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
+#endif
+}
+#endif /* !CONFIG_X86_64 */
+
+static inline void tss_setup_io_bitmap(struct tss_struct *tss)
+{
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+
+#ifdef CONFIG_X86_IOPL_IOPERM
+ tss->io_bitmap.prev_max = 0;
+ tss->io_bitmap.prev_sequence = 0;
+ memset(tss->io_bitmap.bitmap, 0xff, sizeof(tss->io_bitmap.bitmap));
+ /*
+ * Invalidate the extra array entry past the end of the all
+ * permission bitmap as required by the hardware.
+ */
+ tss->io_bitmap.mapall[IO_BITMAP_LONGS] = ~0UL;
#endif
+}
/*
* cpu_init() initializes state that is per-CPU. Some data is already
@@ -1808,21 +1858,15 @@ static void setup_getcpu(int cpu)
* and IDT. We reload them nevertheless, this function acts as a
* 'CPU state barrier', nothing should get across.
*/
-#ifdef CONFIG_X86_64
-
void cpu_init(void)
{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+ struct task_struct *cur = current;
int cpu = raw_smp_processor_id();
- struct task_struct *me;
- struct tss_struct *t;
- int i;
wait_for_master_cpu(cpu);
- if (cpu)
- load_ucode_ap();
-
- t = &per_cpu(cpu_tss_rw, cpu);
+ ucode_cpu_init(cpu);
#ifdef CONFIG_NUMA
if (this_cpu_read(numa_node) == 0 &&
@@ -1831,63 +1875,47 @@ void cpu_init(void)
#endif
setup_getcpu(cpu);
- me = current;
-
pr_debug("Initializing CPU#%d\n", cpu);
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ if (IS_ENABLED(CONFIG_X86_64) || cpu_feature_enabled(X86_FEATURE_VME) ||
+ boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
+ cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
-
switch_to_new_gdt(cpu);
- loadsegment(fs, 0);
-
load_current_idt();
- memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
- syscall_init();
-
- wrmsrl(MSR_FS_BASE, 0);
- wrmsrl(MSR_KERNEL_GS_BASE, 0);
- barrier();
+ if (IS_ENABLED(CONFIG_X86_64)) {
+ loadsegment(fs, 0);
+ memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
+ syscall_init();
- x86_configure_nx();
- x2apic_setup();
+ wrmsrl(MSR_FS_BASE, 0);
+ wrmsrl(MSR_KERNEL_GS_BASE, 0);
+ barrier();
- /*
- * set up and load the per-CPU TSS
- */
- if (!t->x86_tss.ist[0]) {
- t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
- t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
- t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
- t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
+ x2apic_setup();
}
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
- /*
- * <= is required because the CPU will access up to
- * 8 bits beyond the end of the IO permission bitmap.
- */
- for (i = 0; i <= IO_BITMAP_LONGS; i++)
- t->io_bitmap[i] = ~0UL;
-
mmgrab(&init_mm);
- me->active_mm = &init_mm;
- BUG_ON(me->mm);
+ cur->active_mm = &init_mm;
+ BUG_ON(cur->mm);
initialize_tlbstate_and_flush();
- enter_lazy_tlb(&init_mm, me);
+ enter_lazy_tlb(&init_mm, cur);
- /*
- * Initialize the TSS. sp0 points to the entry trampoline stack
- * regardless of what task is running.
- */
+ /* Initialize the TSS. */
+ tss_setup_ist(tss);
+ tss_setup_io_bitmap(tss);
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
+
load_TR_desc();
+ /*
+ * sp0 points to the entry trampoline stack regardless of what task
+ * is running.
+ */
load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
load_mm_ldt(&init_mm);
@@ -1895,6 +1923,8 @@ void cpu_init(void)
clear_all_debug_regs();
dbg_restore_debug_regs();
+ gdt_setup_doublefault_tss(cpu);
+
fpu__init_cpu();
if (is_uv_system())
@@ -1903,63 +1933,6 @@ void cpu_init(void)
load_fixmap_gdt(cpu);
}
-#else
-
-void cpu_init(void)
-{
- int cpu = smp_processor_id();
- struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(cpu_tss_rw, cpu);
-
- wait_for_master_cpu(cpu);
-
- show_ucode_info_early();
-
- pr_info("Initializing CPU#%d\n", cpu);
-
- if (cpu_feature_enabled(X86_FEATURE_VME) ||
- boot_cpu_has(X86_FEATURE_TSC) ||
- boot_cpu_has(X86_FEATURE_DE))
- cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
-
- load_current_idt();
- switch_to_new_gdt(cpu);
-
- /*
- * Set up and load the per-CPU TSS and LDT
- */
- mmgrab(&init_mm);
- curr->active_mm = &init_mm;
- BUG_ON(curr->mm);
- initialize_tlbstate_and_flush();
- enter_lazy_tlb(&init_mm, curr);
-
- /*
- * Initialize the TSS. sp0 points to the entry trampoline stack
- * regardless of what task is running.
- */
- set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
- load_TR_desc();
- load_sp0((unsigned long)(cpu_entry_stack(cpu) + 1));
-
- load_mm_ldt(&init_mm);
-
- t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
-
-#ifdef CONFIG_DOUBLEFAULT
- /* Set up doublefault TSS pointer in the GDT */
- __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
-#endif
-
- clear_all_debug_regs();
- dbg_restore_debug_regs();
-
- fpu__init_cpu();
-
- load_fixmap_gdt(cpu);
-}
-#endif
-
/*
* The microcode loader calls this upon late microcode load to recheck features,
* only when microcode has been updated. Caller holds microcode_mutex and CPU
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 11d5c5950e2d..4a900804a023 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -819,7 +819,7 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0x04, TLB_DATA_4M, 8, " TLB_DATA 4 MByte pages, 4-way set associative" },
{ 0x05, TLB_DATA_4M, 32, " TLB_DATA 4 MByte pages, 4-way set associative" },
{ 0x0b, TLB_INST_4M, 4, " TLB_INST 4 MByte pages, 4-way set associative" },
- { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages */" },
+ { 0x4f, TLB_INST_4K, 32, " TLB_INST 4 KByte pages" },
{ 0x50, TLB_INST_ALL, 64, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
{ 0x51, TLB_INST_ALL, 128, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
{ 0x52, TLB_INST_ALL, 256, " TLB_INST 4 KByte and 2-MByte or 4-MByte pages" },
@@ -847,7 +847,7 @@ static const struct _tlb_table intel_tlb_table[] = {
{ 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" },
{ 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" },
{ 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" },
- { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" },
+ { 0xc2, TLB_DATA_2M_4M, 16, " TLB_DATA 2 MByte/4MByte pages, 4-way associative" },
{ 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" },
{ 0x00, 0, 0 }
};
@@ -859,8 +859,8 @@ static void intel_tlb_lookup(const unsigned char desc)
return;
/* look up this descriptor in the table */
- for (k = 0; intel_tlb_table[k].descriptor != desc && \
- intel_tlb_table[k].descriptor != 0; k++)
+ for (k = 0; intel_tlb_table[k].descriptor != desc &&
+ intel_tlb_table[k].descriptor != 0; k++)
;
if (intel_tlb_table[k].tlb_type == 0)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index c656d92cd708..caa032ce3fe3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -290,7 +290,12 @@ static void __init ms_hyperv_init_platform(void)
machine_ops.shutdown = hv_machine_shutdown;
machine_ops.crash_shutdown = hv_machine_crash_shutdown;
#endif
- mark_tsc_unstable("running on Hyper-V");
+ if (ms_hyperv.features & HV_X64_ACCESS_TSC_INVARIANT) {
+ wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1);
+ setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE);
+ } else {
+ mark_tsc_unstable("running on Hyper-V");
+ }
/*
* Generation 2 instances don't support reading the NMI status from
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index 5c900f9527ff..c4be62058dd9 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -29,7 +29,8 @@ __setup("nordrand", x86_rdrand_setup);
#ifdef CONFIG_ARCH_RANDOM
void x86_init_rdrand(struct cpuinfo_x86 *c)
{
- unsigned long tmp;
+ unsigned int changed = 0;
+ unsigned long tmp, prev;
int i;
if (!cpu_has(c, X86_FEATURE_RDRAND))
@@ -42,5 +43,24 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
return;
}
}
+
+ /*
+ * Stupid sanity-check whether RDRAND does *actually* generate
+ * some at least random-looking data.
+ */
+ prev = tmp;
+ for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
+ if (rdrand_long(&tmp)) {
+ if (prev != tmp)
+ changed++;
+
+ prev = tmp;
+ }
+ }
+
+ if (WARN_ON_ONCE(!changed))
+ pr_emerg(
+"RDRAND gives funky smelling output, might consider not using it by booting with \"nordrand\"");
+
}
#endif
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index eb651fbde92a..00fc55ac7ffa 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,6 +24,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/memblock.h>
#include <asm/processor.h>
#include <asm/hardirq.h>
@@ -39,6 +40,7 @@
#include <asm/virtext.h>
#include <asm/intel_pt.h>
#include <asm/crash.h>
+#include <asm/cmdline.h>
/* Used while preparing memory map entries for second kernel */
struct crash_memmap_data {
@@ -68,6 +70,19 @@ static inline void cpu_crash_vmclear_loaded_vmcss(void)
rcu_read_unlock();
}
+/*
+ * When the crashkernel option is specified, only use the low
+ * 1M for the real mode trampoline.
+ */
+void __init crash_reserve_low_1M(void)
+{
+ if (cmdline_find_option(boot_command_line, "crashkernel", NULL, 0) < 0)
+ return;
+
+ memblock_reserve(0, 1<<20);
+ pr_info("Reserving the low 1M of memory for crashkernel\n");
+}
+
#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
@@ -173,8 +188,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
#ifdef CONFIG_KEXEC_FILE
-static unsigned long crash_zero_bytes;
-
static int get_nr_ram_ranges_callback(struct resource *res, void *arg)
{
unsigned int *nr_ranges = arg;
@@ -189,8 +202,7 @@ static struct crash_mem *fill_up_crash_elf_data(void)
unsigned int nr_ranges = 0;
struct crash_mem *cmem;
- walk_system_ram_res(0, -1, &nr_ranges,
- get_nr_ram_ranges_callback);
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback);
if (!nr_ranges)
return NULL;
@@ -217,15 +229,19 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
{
int ret = 0;
+ /* Exclude the low 1M because it is always reserved */
+ ret = crash_exclude_mem_range(cmem, 0, 1<<20);
+ if (ret)
+ return ret;
+
/* Exclude crashkernel region */
ret = crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end);
if (ret)
return ret;
- if (crashk_low_res.end) {
+ if (crashk_low_res.end)
ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
- crashk_low_res.end);
- }
+ crashk_low_res.end);
return ret;
}
@@ -246,16 +262,13 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
unsigned long *sz)
{
struct crash_mem *cmem;
- Elf64_Ehdr *ehdr;
- Elf64_Phdr *phdr;
- int ret, i;
+ int ret;
cmem = fill_up_crash_elf_data();
if (!cmem)
return -ENOMEM;
- ret = walk_system_ram_res(0, -1, cmem,
- prepare_elf64_ram_headers_callback);
+ ret = walk_system_ram_res(0, -1, cmem, prepare_elf64_ram_headers_callback);
if (ret)
goto out;
@@ -265,24 +278,8 @@ static int prepare_elf_headers(struct kimage *image, void **addr,
goto out;
/* By default prepare 64bit headers */
- ret = crash_prepare_elf64_headers(cmem,
- IS_ENABLED(CONFIG_X86_64), addr, sz);
- if (ret)
- goto out;
+ ret = crash_prepare_elf64_headers(cmem, IS_ENABLED(CONFIG_X86_64), addr, sz);
- /*
- * If a range matches backup region, adjust offset to backup
- * segment.
- */
- ehdr = (Elf64_Ehdr *)*addr;
- phdr = (Elf64_Phdr *)(ehdr + 1);
- for (i = 0; i < ehdr->e_phnum; phdr++, i++)
- if (phdr->p_type == PT_LOAD &&
- phdr->p_paddr == image->arch.backup_src_start &&
- phdr->p_memsz == image->arch.backup_src_sz) {
- phdr->p_offset = image->arch.backup_load_addr;
- break;
- }
out:
vfree(cmem);
return ret;
@@ -296,8 +293,7 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry)
if (nr_e820_entries >= E820_MAX_ENTRIES_ZEROPAGE)
return 1;
- memcpy(&params->e820_table[nr_e820_entries], entry,
- sizeof(struct e820_entry));
+ memcpy(&params->e820_table[nr_e820_entries], entry, sizeof(struct e820_entry));
params->e820_entries++;
return 0;
}
@@ -321,19 +317,11 @@ static int memmap_exclude_ranges(struct kimage *image, struct crash_mem *cmem,
unsigned long long mend)
{
unsigned long start, end;
- int ret = 0;
cmem->ranges[0].start = mstart;
cmem->ranges[0].end = mend;
cmem->nr_ranges = 1;
- /* Exclude Backup region */
- start = image->arch.backup_load_addr;
- end = start + image->arch.backup_src_sz - 1;
- ret = crash_exclude_mem_range(cmem, start, end);
- if (ret)
- return ret;
-
/* Exclude elf header region */
start = image->arch.elf_load_addr;
end = start + image->arch.elf_headers_sz - 1;
@@ -356,28 +344,28 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
memset(&cmd, 0, sizeof(struct crash_memmap_data));
cmd.params = params;
- /* Add first 640K segment */
- ei.addr = image->arch.backup_src_start;
- ei.size = image->arch.backup_src_sz;
- ei.type = E820_TYPE_RAM;
- add_e820_entry(params, &ei);
+ /* Add the low 1M */
+ cmd.type = E820_TYPE_RAM;
+ flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ walk_iomem_res_desc(IORES_DESC_NONE, flags, 0, (1<<20)-1, &cmd,
+ memmap_entry_callback);
/* Add ACPI tables */
cmd.type = E820_TYPE_ACPI;
flags = IORESOURCE_MEM | IORESOURCE_BUSY;
walk_iomem_res_desc(IORES_DESC_ACPI_TABLES, flags, 0, -1, &cmd,
- memmap_entry_callback);
+ memmap_entry_callback);
/* Add ACPI Non-volatile Storage */
cmd.type = E820_TYPE_NVS;
walk_iomem_res_desc(IORES_DESC_ACPI_NV_STORAGE, flags, 0, -1, &cmd,
- memmap_entry_callback);
+ memmap_entry_callback);
/* Add e820 reserved ranges */
cmd.type = E820_TYPE_RESERVED;
flags = IORESOURCE_MEM;
walk_iomem_res_desc(IORES_DESC_RESERVED, flags, 0, -1, &cmd,
- memmap_entry_callback);
+ memmap_entry_callback);
/* Add crashk_low_res region */
if (crashk_low_res.end) {
@@ -388,8 +376,7 @@ int crash_setup_memmap_entries(struct kimage *image, struct boot_params *params)
}
/* Exclude some ranges from crashk_res and add rest to memmap */
- ret = memmap_exclude_ranges(image, cmem, crashk_res.start,
- crashk_res.end);
+ ret = memmap_exclude_ranges(image, cmem, crashk_res.start, crashk_res.end);
if (ret)
goto out;
@@ -409,55 +396,12 @@ out:
return ret;
}
-static int determine_backup_region(struct resource *res, void *arg)
-{
- struct kimage *image = arg;
-
- image->arch.backup_src_start = res->start;
- image->arch.backup_src_sz = resource_size(res);
-
- /* Expecting only one range for backup region */
- return 1;
-}
-
int crash_load_segments(struct kimage *image)
{
int ret;
struct kexec_buf kbuf = { .image = image, .buf_min = 0,
.buf_max = ULONG_MAX, .top_down = false };
- /*
- * Determine and load a segment for backup area. First 640K RAM
- * region is backup source
- */
-
- ret = walk_system_ram_res(KEXEC_BACKUP_SRC_START, KEXEC_BACKUP_SRC_END,
- image, determine_backup_region);
-
- /* Zero or postive return values are ok */
- if (ret < 0)
- return ret;
-
- /* Add backup segment. */
- if (image->arch.backup_src_sz) {
- kbuf.buffer = &crash_zero_bytes;
- kbuf.bufsz = sizeof(crash_zero_bytes);
- kbuf.memsz = image->arch.backup_src_sz;
- kbuf.buf_align = PAGE_SIZE;
- /*
- * Ideally there is no source for backup segment. This is
- * copied in purgatory after crash. Just add a zero filled
- * segment for now to make sure checksum logic works fine.
- */
- ret = kexec_add_buffer(&kbuf);
- if (ret)
- return ret;
- image->arch.backup_load_addr = kbuf.mem;
- pr_debug("Loaded backup region at 0x%lx backup_start=0x%lx memsz=0x%lx\n",
- image->arch.backup_load_addr,
- image->arch.backup_src_start, kbuf.memsz);
- }
-
/* Prepare elf headers and add a segment */
ret = prepare_elf_headers(image, &kbuf.buffer, &kbuf.bufsz);
if (ret)
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c
index 0b8cedb20d6d..0d6c657593f8 100644
--- a/arch/x86/kernel/doublefault.c
+++ b/arch/x86/kernel/doublefault.c
@@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = {
.sp0 = STACK_START,
.ss0 = __KERNEL_DS,
.ldt = 0,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
.ip = (unsigned long) doublefault_fn,
/* 0x2 bit is always set */
@@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = {
.ss = __KERNEL_DS,
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
+#ifndef CONFIG_X86_32_LAZY_GS
+ .gs = __KERNEL_STACK_CANARY,
+#endif
.__cr3 = __pa_nodebug(swapper_pg_dir),
};
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 7da2bcd2b8eb..c5399e80c59c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -190,6 +190,7 @@ static void __init e820_print_type(enum e820_type type)
case E820_TYPE_RAM: /* Fall through: */
case E820_TYPE_RESERVED_KERN: pr_cont("usable"); break;
case E820_TYPE_RESERVED: pr_cont("reserved"); break;
+ case E820_TYPE_SOFT_RESERVED: pr_cont("soft reserved"); break;
case E820_TYPE_ACPI: pr_cont("ACPI data"); break;
case E820_TYPE_NVS: pr_cont("ACPI NVS"); break;
case E820_TYPE_UNUSABLE: pr_cont("unusable"); break;
@@ -999,6 +1000,17 @@ void __init e820__reserve_setup_data(void)
data = early_memremap(pa_data, sizeof(*data));
e820__range_update(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
e820__range_update_kexec(pa_data, sizeof(*data)+data->len, E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+ e820__range_update(((struct setup_indirect *)data->data)->addr,
+ ((struct setup_indirect *)data->data)->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ e820__range_update_kexec(((struct setup_indirect *)data->data)->addr,
+ ((struct setup_indirect *)data->data)->len,
+ E820_TYPE_RAM, E820_TYPE_RESERVED_KERN);
+ }
+
pa_data = data->next;
early_memunmap(data, sizeof(*data));
}
@@ -1037,6 +1049,7 @@ static const char *__init e820_type_to_string(struct e820_entry *entry)
case E820_TYPE_PRAM: return "Persistent Memory (legacy)";
case E820_TYPE_PMEM: return "Persistent Memory";
case E820_TYPE_RESERVED: return "Reserved";
+ case E820_TYPE_SOFT_RESERVED: return "Soft Reserved";
default: return "Unknown E820 type";
}
}
@@ -1052,6 +1065,7 @@ static unsigned long __init e820_type_to_iomem_type(struct e820_entry *entry)
case E820_TYPE_PRAM: /* Fall-through: */
case E820_TYPE_PMEM: /* Fall-through: */
case E820_TYPE_RESERVED: /* Fall-through: */
+ case E820_TYPE_SOFT_RESERVED: /* Fall-through: */
default: return IORESOURCE_MEM;
}
}
@@ -1064,6 +1078,7 @@ static unsigned long __init e820_type_to_iores_desc(struct e820_entry *entry)
case E820_TYPE_PMEM: return IORES_DESC_PERSISTENT_MEMORY;
case E820_TYPE_PRAM: return IORES_DESC_PERSISTENT_MEMORY_LEGACY;
case E820_TYPE_RESERVED: return IORES_DESC_RESERVED;
+ case E820_TYPE_SOFT_RESERVED: return IORES_DESC_SOFT_RESERVED;
case E820_TYPE_RESERVED_KERN: /* Fall-through: */
case E820_TYPE_RAM: /* Fall-through: */
case E820_TYPE_UNUSABLE: /* Fall-through: */
@@ -1078,11 +1093,12 @@ static bool __init do_mark_busy(enum e820_type type, struct resource *res)
return true;
/*
- * Treat persistent memory like device memory, i.e. reserve it
- * for exclusive use of a driver
+ * Treat persistent memory and other special memory ranges like
+ * device memory, i.e. reserve it for exclusive use of a driver
*/
switch (type) {
case E820_TYPE_RESERVED:
+ case E820_TYPE_SOFT_RESERVED:
case E820_TYPE_PRAM:
case E820_TYPE_PMEM:
return false;
@@ -1285,6 +1301,9 @@ void __init e820__memblock_setup(void)
if (end != (resource_size_t)end)
continue;
+ if (entry->type == E820_TYPE_SOFT_RESERVED)
+ memblock_reserve(entry->addr, entry->size);
+
if (entry->type != E820_TYPE_RAM && entry->type != E820_TYPE_RESERVED_KERN)
continue;
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index e5cb67d67c03..319be936c348 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -60,7 +60,7 @@ u64 xfeatures_mask __read_mostly;
static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
-static unsigned int xstate_comp_offsets[sizeof(xfeatures_mask)*8];
+static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1};
/*
* The XSAVE area of kernel can be in standard or compacted format;
@@ -254,10 +254,13 @@ static void __init setup_xstate_features(void)
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_offsets[0] = 0;
- xstate_sizes[0] = offsetof(struct fxregs_state, xmm_space);
- xstate_offsets[1] = xstate_sizes[0];
- xstate_sizes[1] = FIELD_SIZEOF(struct fxregs_state, xmm_space);
+ xstate_offsets[XFEATURE_FP] = 0;
+ xstate_sizes[XFEATURE_FP] = offsetof(struct fxregs_state,
+ xmm_space);
+
+ xstate_offsets[XFEATURE_SSE] = xstate_sizes[XFEATURE_FP];
+ xstate_sizes[XFEATURE_SSE] = FIELD_SIZEOF(struct fxregs_state,
+ xmm_space);
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
if (!xfeature_enabled(i))
@@ -342,7 +345,7 @@ static int xfeature_is_aligned(int xfeature_nr)
*/
static void __init setup_xstate_comp(void)
{
- unsigned int xstate_comp_sizes[sizeof(xfeatures_mask)*8];
+ unsigned int xstate_comp_sizes[XFEATURE_MAX];
int i;
/*
@@ -350,8 +353,9 @@ static void __init setup_xstate_comp(void)
* in the fixed offsets in the xsave area in either compacted form
* or standard form.
*/
- xstate_comp_offsets[0] = 0;
- xstate_comp_offsets[1] = offsetof(struct fxregs_state, xmm_space);
+ xstate_comp_offsets[XFEATURE_FP] = 0;
+ xstate_comp_offsets[XFEATURE_SSE] = offsetof(struct fxregs_state,
+ xmm_space);
if (!boot_cpu_has(X86_FEATURE_XSAVES)) {
for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) {
@@ -840,7 +844,7 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr)
/*
* We should not ever be requesting features that we
- * have not enabled. Remember that pcntxt_mask is
+ * have not enabled. Remember that xfeatures_mask is
* what we write to the XCR0 register.
*/
WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)),
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 024c3053dbba..060a361d9d11 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -1043,6 +1043,20 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent,
return;
/*
+ * If the return location is actually pointing directly to
+ * the start of a direct trampoline (if we trace the trampoline
+ * it will still be offset by MCOUNT_INSN_SIZE), then the
+ * return address is actually off by one word, and we
+ * need to adjust for that.
+ */
+ if (ftrace_direct_func_count) {
+ if (ftrace_find_direct_func(self_addr + MCOUNT_INSN_SIZE)) {
+ self_addr = *parent;
+ parent++;
+ }
+ }
+
+ /*
* Protect against fault, even if it shouldn't
* happen. This tool is too much intrusive to
* ignore such a protection.
diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S
index 073aab525d80..e8a9f8370112 100644
--- a/arch/x86/kernel/ftrace_32.S
+++ b/arch/x86/kernel/ftrace_32.S
@@ -12,20 +12,18 @@
#include <asm/frame.h>
#include <asm/asm-offsets.h>
-# define function_hook __fentry__
-EXPORT_SYMBOL(__fentry__)
-
#ifdef CONFIG_FRAME_POINTER
# define MCOUNT_FRAME 1 /* using frame = true */
#else
# define MCOUNT_FRAME 0 /* using frame = false */
#endif
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
ret
-END(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
-ENTRY(ftrace_caller)
+SYM_CODE_START(ftrace_caller)
#ifdef CONFIG_FRAME_POINTER
/*
@@ -85,11 +83,11 @@ ftrace_graph_call:
#endif
/* This is weak to keep gas from relaxing the jumps */
-WEAK(ftrace_stub)
+SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
ret
-END(ftrace_caller)
+SYM_CODE_END(ftrace_caller)
-ENTRY(ftrace_regs_caller)
+SYM_CODE_START(ftrace_regs_caller)
/*
* We're here from an mcount/fentry CALL, and the stack frame looks like:
*
@@ -138,7 +136,7 @@ ENTRY(ftrace_regs_caller)
movl function_trace_op, %ecx # 3rd argument: ftrace_pos
pushl %esp # 4th argument: pt_regs
-GLOBAL(ftrace_regs_call)
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
call ftrace_stub
addl $4, %esp # skip 4th argument
@@ -163,9 +161,10 @@ GLOBAL(ftrace_regs_call)
popl %eax
jmp .Lftrace_ret
+SYM_CODE_END(ftrace_regs_caller)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
+SYM_CODE_START(ftrace_graph_caller)
pushl %eax
pushl %ecx
pushl %edx
@@ -179,7 +178,7 @@ ENTRY(ftrace_graph_caller)
popl %ecx
popl %eax
ret
-END(ftrace_graph_caller)
+SYM_CODE_END(ftrace_graph_caller)
.globl return_to_handler
return_to_handler:
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 809d54397dba..369e61faacfe 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -14,9 +14,6 @@
.code64
.section .entry.text, "ax"
-# define function_hook __fentry__
-EXPORT_SYMBOL(__fentry__)
-
#ifdef CONFIG_FRAME_POINTER
/* Save parent and function stack frames (rip and rbp) */
# define MCOUNT_FRAME_SIZE (8+16*2)
@@ -88,6 +85,7 @@ EXPORT_SYMBOL(__fentry__)
movq %rdi, RDI(%rsp)
movq %r8, R8(%rsp)
movq %r9, R9(%rsp)
+ movq $0, ORIG_RAX(%rsp)
/*
* Save the original RBP. Even though the mcount ABI does not
* require this, it helps out callers.
@@ -114,7 +112,11 @@ EXPORT_SYMBOL(__fentry__)
subq $MCOUNT_INSN_SIZE, %rdi
.endm
-.macro restore_mcount_regs
+.macro restore_mcount_regs save=0
+
+ /* ftrace_regs_caller or frame pointers require this */
+ movq RBP(%rsp), %rbp
+
movq R9(%rsp), %r9
movq R8(%rsp), %r8
movq RDI(%rsp), %rdi
@@ -123,31 +125,29 @@ EXPORT_SYMBOL(__fentry__)
movq RCX(%rsp), %rcx
movq RAX(%rsp), %rax
- /* ftrace_regs_caller can modify %rbp */
- movq RBP(%rsp), %rbp
-
- addq $MCOUNT_REG_SIZE, %rsp
+ addq $MCOUNT_REG_SIZE-\save, %rsp
.endm
#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
retq
-ENDPROC(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
-ENTRY(ftrace_caller)
+SYM_FUNC_START(ftrace_caller)
/* save_mcount_regs fills in first two parameters */
save_mcount_regs
-GLOBAL(ftrace_caller_op_ptr)
+SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
/* Load the ftrace_ops into the 3rd parameter */
movq function_trace_op(%rip), %rdx
/* regs go into 4th parameter (but make it NULL) */
movq $0, %rcx
-GLOBAL(ftrace_call)
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
call ftrace_stub
restore_mcount_regs
@@ -157,10 +157,10 @@ GLOBAL(ftrace_call)
* think twice before adding any new code or changing the
* layout here.
*/
-GLOBAL(ftrace_epilogue)
+SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-GLOBAL(ftrace_graph_call)
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
jmp ftrace_stub
#endif
@@ -168,19 +168,21 @@ GLOBAL(ftrace_graph_call)
* This is weak to keep gas from relaxing the jumps.
* It is also used to copy the retq for trampolines.
*/
-WEAK(ftrace_stub)
+SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
retq
-ENDPROC(ftrace_caller)
+SYM_FUNC_END(ftrace_caller)
-ENTRY(ftrace_regs_caller)
+SYM_FUNC_START(ftrace_regs_caller)
/* Save the current flags before any operations that can change them */
pushfq
+ UNWIND_HINT_SAVE
+
/* added 8 bytes to save flags */
save_mcount_regs 8
/* save_mcount_regs fills in first two parameters */
-GLOBAL(ftrace_regs_caller_op_ptr)
+SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
/* Load the ftrace_ops into the 3rd parameter */
movq function_trace_op(%rip), %rdx
@@ -209,7 +211,7 @@ GLOBAL(ftrace_regs_caller_op_ptr)
/* regs go into 4th parameter */
leaq (%rsp), %rcx
-GLOBAL(ftrace_regs_call)
+SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
call ftrace_stub
/* Copy flags back to SS, to restore them */
@@ -228,7 +230,33 @@ GLOBAL(ftrace_regs_call)
movq R10(%rsp), %r10
movq RBX(%rsp), %rbx
- restore_mcount_regs
+ movq ORIG_RAX(%rsp), %rax
+ movq %rax, MCOUNT_REG_SIZE-8(%rsp)
+
+ /* If ORIG_RAX is anything but zero, make this a call to that */
+ movq ORIG_RAX(%rsp), %rax
+ cmpq $0, %rax
+ je 1f
+
+ /* Swap the flags with orig_rax */
+ movq MCOUNT_REG_SIZE(%rsp), %rdi
+ movq %rdi, MCOUNT_REG_SIZE-8(%rsp)
+ movq %rax, MCOUNT_REG_SIZE(%rsp)
+
+ restore_mcount_regs 8
+
+ jmp 2f
+
+1: restore_mcount_regs
+
+
+2:
+ /*
+ * The stack layout is nondetermistic here, depending on which path was
+ * taken. This confuses objtool and ORC, rightfully so. For now,
+ * pretend the stack always looks like the non-direct case.
+ */
+ UNWIND_HINT_RESTORE
/* Restore flags */
popfq
@@ -239,16 +267,16 @@ GLOBAL(ftrace_regs_call)
* The trampoline will add the code to jump
* to the return.
*/
-GLOBAL(ftrace_regs_caller_end)
+SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
jmp ftrace_epilogue
-ENDPROC(ftrace_regs_caller)
+SYM_FUNC_END(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(function_hook)
+SYM_FUNC_START(__fentry__)
cmpq $ftrace_stub, ftrace_trace_function
jnz trace
@@ -261,7 +289,7 @@ fgraph_trace:
jnz ftrace_graph_caller
#endif
-GLOBAL(ftrace_stub)
+SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
retq
trace:
@@ -279,11 +307,12 @@ trace:
restore_mcount_regs
jmp fgraph_trace
-ENDPROC(function_hook)
+SYM_FUNC_END(__fentry__)
+EXPORT_SYMBOL(__fentry__)
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-ENTRY(ftrace_graph_caller)
+SYM_FUNC_START(ftrace_graph_caller)
/* Saves rbp into %rdx and fills first parameter */
save_mcount_regs
@@ -294,9 +323,9 @@ ENTRY(ftrace_graph_caller)
restore_mcount_regs
retq
-ENDPROC(ftrace_graph_caller)
+SYM_FUNC_END(ftrace_graph_caller)
-ENTRY(return_to_handler)
+SYM_CODE_START(return_to_handler)
UNWIND_HINT_EMPTY
subq $24, %rsp
@@ -312,5 +341,5 @@ ENTRY(return_to_handler)
movq (%rsp), %rax
addq $24, %rsp
JMP_NOSPEC %rdi
-END(return_to_handler)
+SYM_CODE_END(return_to_handler)
#endif
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 30f9cb2c0b55..3923ab4630d7 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -64,7 +64,7 @@ RESERVE_BRK(pagetables, INIT_MAP_SIZE)
* can.
*/
__HEAD
-ENTRY(startup_32)
+SYM_CODE_START(startup_32)
movl pa(initial_stack),%ecx
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
@@ -156,7 +156,7 @@ ENTRY(startup_32)
jmp *%eax
.Lbad_subarch:
-WEAK(xen_entry)
+SYM_INNER_LABEL_ALIGN(xen_entry, SYM_L_WEAK)
/* Unknown implementation; there's really
nothing we can do at this point. */
ud2a
@@ -172,6 +172,7 @@ num_subarch_entries = (. - subarch_entries) / 4
#else
jmp .Ldefault_entry
#endif /* CONFIG_PARAVIRT */
+SYM_CODE_END(startup_32)
#ifdef CONFIG_HOTPLUG_CPU
/*
@@ -179,12 +180,12 @@ num_subarch_entries = (. - subarch_entries) / 4
* up already except stack. We just set up stack here. Then call
* start_secondary().
*/
-ENTRY(start_cpu0)
+SYM_FUNC_START(start_cpu0)
movl initial_stack, %ecx
movl %ecx, %esp
call *(initial_code)
1: jmp 1b
-ENDPROC(start_cpu0)
+SYM_FUNC_END(start_cpu0)
#endif
/*
@@ -195,7 +196,7 @@ ENDPROC(start_cpu0)
* If cpu hotplug is not supported then this code can go in init section
* which will be freed later
*/
-ENTRY(startup_32_smp)
+SYM_FUNC_START(startup_32_smp)
cld
movl $(__BOOT_DS),%eax
movl %eax,%ds
@@ -362,7 +363,7 @@ ENTRY(startup_32_smp)
call *(initial_code)
1: jmp 1b
-ENDPROC(startup_32_smp)
+SYM_FUNC_END(startup_32_smp)
#include "verify_cpu.S"
@@ -392,7 +393,7 @@ setup_once:
andl $0,setup_once_ref /* Once is enough, thanks */
ret
-ENTRY(early_idt_handler_array)
+SYM_FUNC_START(early_idt_handler_array)
# 36(%esp) %eflags
# 32(%esp) %cs
# 28(%esp) %eip
@@ -407,9 +408,9 @@ ENTRY(early_idt_handler_array)
i = i + 1
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-ENDPROC(early_idt_handler_array)
+SYM_FUNC_END(early_idt_handler_array)
-early_idt_handler_common:
+SYM_CODE_START_LOCAL(early_idt_handler_common)
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
@@ -460,10 +461,10 @@ early_idt_handler_common:
decl %ss:early_recursion_flag
addl $4, %esp /* pop pt_regs->orig_ax */
iret
-ENDPROC(early_idt_handler_common)
+SYM_CODE_END(early_idt_handler_common)
/* This is the default interrupt "handler" :-) */
-ENTRY(early_ignore_irq)
+SYM_FUNC_START(early_ignore_irq)
cld
#ifdef CONFIG_PRINTK
pushl %eax
@@ -498,19 +499,16 @@ ENTRY(early_ignore_irq)
hlt_loop:
hlt
jmp hlt_loop
-ENDPROC(early_ignore_irq)
+SYM_FUNC_END(early_ignore_irq)
__INITDATA
.align 4
-GLOBAL(early_recursion_flag)
- .long 0
+SYM_DATA(early_recursion_flag, .long 0)
__REFDATA
.align 4
-ENTRY(initial_code)
- .long i386_start_kernel
-ENTRY(setup_once_ref)
- .long setup_once
+SYM_DATA(initial_code, .long i386_start_kernel)
+SYM_DATA(setup_once_ref, .long setup_once)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
#define PGD_ALIGN (2 * PAGE_SIZE)
@@ -553,7 +551,7 @@ EXPORT_SYMBOL(empty_zero_page)
__PAGE_ALIGNED_DATA
/* Page-aligned for the benefit of paravirt? */
.align PGD_ALIGN
-ENTRY(initial_page_table)
+SYM_DATA_START(initial_page_table)
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0 /* low identity map */
# if KPMDS == 3
.long pa(initial_pg_pmd+PGD_IDENT_ATTR),0
@@ -571,17 +569,28 @@ ENTRY(initial_page_table)
# error "Kernel PMDs should be 1, 2 or 3"
# endif
.align PAGE_SIZE /* needs to be page-sized too */
+
+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+ * PTI needs another page so sync_initial_pagetable() works correctly
+ * and does not scribble over the data which is placed behind the
+ * actual initial_page_table. See clone_pgd_range().
+ */
+ .fill 1024, 4, 0
+#endif
+
+SYM_DATA_END(initial_page_table)
#endif
.data
.balign 4
-ENTRY(initial_stack)
- /*
- * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
- * unwinder reliably detect the end of the stack.
- */
- .long init_thread_union + THREAD_SIZE - SIZEOF_PTREGS - \
- TOP_OF_KERNEL_STACK_PADDING;
+/*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
+ * reliably detect the end of the stack.
+ */
+SYM_DATA(initial_stack,
+ .long init_thread_union + THREAD_SIZE -
+ SIZEOF_PTREGS - TOP_OF_KERNEL_STACK_PADDING)
__INITRODATA
int_msg:
@@ -597,27 +606,28 @@ int_msg:
*/
.data
-.globl boot_gdt_descr
-
ALIGN
# early boot GDT descriptor (must use 1:1 address mapping)
.word 0 # 32 bit align gdt_desc.address
-boot_gdt_descr:
+SYM_DATA_START_LOCAL(boot_gdt_descr)
.word __BOOT_DS+7
.long boot_gdt - __PAGE_OFFSET
+SYM_DATA_END(boot_gdt_descr)
# boot GDT descriptor (later on used by CPU#0):
.word 0 # 32 bit align gdt_desc.address
-ENTRY(early_gdt_descr)
+SYM_DATA_START(early_gdt_descr)
.word GDT_ENTRIES*8-1
.long gdt_page /* Overwritten for secondary CPUs */
+SYM_DATA_END(early_gdt_descr)
/*
* The boot_gdt must mirror the equivalent in setup.S and is
* used only for booting.
*/
.align L1_CACHE_BYTES
-ENTRY(boot_gdt)
+SYM_DATA_START(boot_gdt)
.fill GDT_ENTRY_BOOT_CS,8,0
.quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */
.quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */
+SYM_DATA_END(boot_gdt)
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index f3d3e9646a99..4bbc770af632 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -49,8 +49,7 @@ L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
__HEAD
.code64
- .globl startup_64
-startup_64:
+SYM_CODE_START_NOALIGN(startup_64)
UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
@@ -90,7 +89,9 @@ startup_64:
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(early_top_pgt - __START_KERNEL_map), %rax
jmp 1f
-ENTRY(secondary_startup_64)
+SYM_CODE_END(startup_64)
+
+SYM_CODE_START(secondary_startup_64)
UNWIND_HINT_EMPTY
/*
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
@@ -240,7 +241,7 @@ ENTRY(secondary_startup_64)
pushq %rax # target address in negative space
lretq
.Lafter_lret:
-END(secondary_startup_64)
+SYM_CODE_END(secondary_startup_64)
#include "verify_cpu.S"
@@ -250,30 +251,28 @@ END(secondary_startup_64)
* up already except stack. We just set up stack here. Then call
* start_secondary() via .Ljump_to_C_code.
*/
-ENTRY(start_cpu0)
+SYM_CODE_START(start_cpu0)
UNWIND_HINT_EMPTY
movq initial_stack(%rip), %rsp
jmp .Ljump_to_C_code
-END(start_cpu0)
+SYM_CODE_END(start_cpu0)
#endif
/* Both SMP bootup and ACPI suspend change these variables */
__REFDATA
.balign 8
- GLOBAL(initial_code)
- .quad x86_64_start_kernel
- GLOBAL(initial_gs)
- .quad INIT_PER_CPU_VAR(fixed_percpu_data)
- GLOBAL(initial_stack)
- /*
- * The SIZEOF_PTREGS gap is a convention which helps the in-kernel
- * unwinder reliably detect the end of the stack.
- */
- .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS
+SYM_DATA(initial_code, .quad x86_64_start_kernel)
+SYM_DATA(initial_gs, .quad INIT_PER_CPU_VAR(fixed_percpu_data))
+
+/*
+ * The SIZEOF_PTREGS gap is a convention which helps the in-kernel unwinder
+ * reliably detect the end of the stack.
+ */
+SYM_DATA(initial_stack, .quad init_thread_union + THREAD_SIZE - SIZEOF_PTREGS)
__FINITDATA
__INIT
-ENTRY(early_idt_handler_array)
+SYM_CODE_START(early_idt_handler_array)
i = 0
.rept NUM_EXCEPTION_VECTORS
.if ((EXCEPTION_ERRCODE_MASK >> i) & 1) == 0
@@ -289,9 +288,9 @@ ENTRY(early_idt_handler_array)
.fill early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
UNWIND_HINT_IRET_REGS offset=16
-END(early_idt_handler_array)
+SYM_CODE_END(early_idt_handler_array)
-early_idt_handler_common:
+SYM_CODE_START_LOCAL(early_idt_handler_common)
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
@@ -333,17 +332,11 @@ early_idt_handler_common:
20:
decl early_recursion_flag(%rip)
jmp restore_regs_and_return_to_kernel
-END(early_idt_handler_common)
+SYM_CODE_END(early_idt_handler_common)
- __INITDATA
- .balign 4
-GLOBAL(early_recursion_flag)
- .long 0
-
-#define NEXT_PAGE(name) \
- .balign PAGE_SIZE; \
-GLOBAL(name)
+#define SYM_DATA_START_PAGE_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign PAGE_SIZE)
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
@@ -358,11 +351,11 @@ GLOBAL(name)
*/
#define PTI_USER_PGD_FILL 512
/* This ensures they are 8k-aligned: */
-#define NEXT_PGD_PAGE(name) \
- .balign 2 * PAGE_SIZE; \
-GLOBAL(name)
+#define SYM_DATA_START_PTI_ALIGNED(name) \
+ SYM_START(name, SYM_L_GLOBAL, .balign 2 * PAGE_SIZE)
#else
-#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
+#define SYM_DATA_START_PTI_ALIGNED(name) \
+ SYM_DATA_START_PAGE_ALIGNED(name)
#define PTI_USER_PGD_FILL 0
#endif
@@ -375,17 +368,23 @@ GLOBAL(name)
.endr
__INITDATA
-NEXT_PGD_PAGE(early_top_pgt)
+ .balign 4
+
+SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(early_top_pgt)
-NEXT_PAGE(early_dynamic_pgts)
+SYM_DATA_START_PAGE_ALIGNED(early_dynamic_pgts)
.fill 512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+SYM_DATA_END(early_dynamic_pgts)
+
+SYM_DATA(early_recursion_flag, .long 0)
.data
#if defined(CONFIG_XEN_PV) || defined(CONFIG_PVH)
-NEXT_PGD_PAGE(init_top_pgt)
+SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + L4_PAGE_OFFSET*8, 0
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
@@ -393,11 +392,13 @@ NEXT_PGD_PAGE(init_top_pgt)
/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(init_top_pgt)
-NEXT_PAGE(level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level3_ident_pgt)
.quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.fill 511, 8, 0
-NEXT_PAGE(level2_ident_pgt)
+SYM_DATA_END(level3_ident_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_ident_pgt)
/*
* Since I easily can, map the first 1G.
* Don't set NX because code runs from these pages.
@@ -407,25 +408,29 @@ NEXT_PAGE(level2_ident_pgt)
* the CPU should ignore the bit.
*/
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+SYM_DATA_END(level2_ident_pgt)
#else
-NEXT_PGD_PAGE(init_top_pgt)
+SYM_DATA_START_PTI_ALIGNED(init_top_pgt)
.fill 512,8,0
.fill PTI_USER_PGD_FILL,8,0
+SYM_DATA_END(init_top_pgt)
#endif
#ifdef CONFIG_X86_5LEVEL
-NEXT_PAGE(level4_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level4_kernel_pgt)
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(level4_kernel_pgt)
#endif
-NEXT_PAGE(level3_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level3_kernel_pgt)
.fill L3_START_KERNEL,8,0
/* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */
.quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.quad level2_fixmap_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
+SYM_DATA_END(level3_kernel_pgt)
-NEXT_PAGE(level2_kernel_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_kernel_pgt)
/*
* 512 MB kernel mapping. We spend a full page on this pagetable
* anyway.
@@ -442,8 +447,9 @@ NEXT_PAGE(level2_kernel_pgt)
*/
PMDS(0, __PAGE_KERNEL_LARGE_EXEC,
KERNEL_IMAGE_SIZE/PMD_SIZE)
+SYM_DATA_END(level2_kernel_pgt)
-NEXT_PAGE(level2_fixmap_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level2_fixmap_pgt)
.fill (512 - 4 - FIXMAP_PMD_NUM),8,0
pgtno = 0
.rept (FIXMAP_PMD_NUM)
@@ -453,31 +459,32 @@ NEXT_PAGE(level2_fixmap_pgt)
.endr
/* 6 MB reserved space + a 2MB hole */
.fill 4,8,0
+SYM_DATA_END(level2_fixmap_pgt)
-NEXT_PAGE(level1_fixmap_pgt)
+SYM_DATA_START_PAGE_ALIGNED(level1_fixmap_pgt)
.rept (FIXMAP_PMD_NUM)
.fill 512,8,0
.endr
+SYM_DATA_END(level1_fixmap_pgt)
#undef PMDS
.data
.align 16
- .globl early_gdt_descr
-early_gdt_descr:
- .word GDT_ENTRIES*8-1
-early_gdt_descr_base:
- .quad INIT_PER_CPU_VAR(gdt_page)
-
-ENTRY(phys_base)
- /* This must match the first entry in level2_kernel_pgt */
- .quad 0x0000000000000000
+
+SYM_DATA(early_gdt_descr, .word GDT_ENTRIES*8-1)
+SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page))
+
+ .align 16
+/* This must match the first entry in level2_kernel_pgt */
+SYM_DATA(phys_base, .quad 0x0)
EXPORT_SYMBOL(phys_base)
#include "../../x86/xen/xen-head.S"
__PAGE_ALIGNED_BSS
-NEXT_PAGE(empty_zero_page)
+SYM_DATA_START_PAGE_ALIGNED(empty_zero_page)
.skip PAGE_SIZE
+SYM_DATA_END(empty_zero_page)
EXPORT_SYMBOL(empty_zero_page)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 61a89d3c0382..8abeee0dd7bf 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -3,32 +3,69 @@
* This contains the io-permission bitmap code - written by obz, with changes
* by Linus. 32/64 bits code unification by Miguel Botón.
*/
-
-#include <linux/sched.h>
-#include <linux/sched/task_stack.h>
-#include <linux/kernel.h>
#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
#include <linux/security.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/slab.h>
-#include <linux/thread_info.h>
#include <linux/syscalls.h>
#include <linux/bitmap.h>
-#include <asm/syscalls.h>
+#include <linux/ioport.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+
+#include <asm/io_bitmap.h>
#include <asm/desc.h>
+#ifdef CONFIG_X86_IOPL_IOPERM
+
+static atomic64_t io_bitmap_sequence;
+
+void io_bitmap_share(struct task_struct *tsk)
+{
+ /* Can be NULL when current->thread.iopl_emul == 3 */
+ if (current->thread.io_bitmap) {
+ /*
+ * Take a refcount on current's bitmap. It can be used by
+ * both tasks as long as none of them changes the bitmap.
+ */
+ refcount_inc(&current->thread.io_bitmap->refcnt);
+ tsk->thread.io_bitmap = current->thread.io_bitmap;
+ }
+ set_tsk_thread_flag(tsk, TIF_IO_BITMAP);
+}
+
+static void task_update_io_bitmap(void)
+{
+ struct thread_struct *t = &current->thread;
+
+ if (t->iopl_emul == 3 || t->io_bitmap) {
+ /* TSS update is handled on exit to user space */
+ set_thread_flag(TIF_IO_BITMAP);
+ } else {
+ clear_thread_flag(TIF_IO_BITMAP);
+ /* Invalidate TSS */
+ preempt_disable();
+ tss_update_io_bitmap();
+ preempt_enable();
+ }
+}
+
+void io_bitmap_exit(void)
+{
+ struct io_bitmap *iobm = current->thread.io_bitmap;
+
+ current->thread.io_bitmap = NULL;
+ task_update_io_bitmap();
+ if (iobm && refcount_dec_and_test(&iobm->refcnt))
+ kfree(iobm);
+}
+
/*
- * this changes the io permissions bitmap in the current task.
+ * This changes the io permissions bitmap in the current task.
*/
long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
{
struct thread_struct *t = &current->thread;
- struct tss_struct *tss;
- unsigned int i, max_long, bytes, bytes_updated;
+ unsigned int i, max_long;
+ struct io_bitmap *iobm;
if ((from + num <= from) || (from + num > IO_BITMAP_BITS))
return -EINVAL;
@@ -41,59 +78,72 @@ long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
* IO bitmap up. ioperm() is much less timing critical than clone(),
* this is why we delay this operation until now:
*/
- if (!t->io_bitmap_ptr) {
- unsigned long *bitmap = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
-
- if (!bitmap)
+ iobm = t->io_bitmap;
+ if (!iobm) {
+ /* No point to allocate a bitmap just to clear permissions */
+ if (!turn_on)
+ return 0;
+ iobm = kmalloc(sizeof(*iobm), GFP_KERNEL);
+ if (!iobm)
return -ENOMEM;
- memset(bitmap, 0xff, IO_BITMAP_BYTES);
- t->io_bitmap_ptr = bitmap;
- set_thread_flag(TIF_IO_BITMAP);
+ memset(iobm->bitmap, 0xff, sizeof(iobm->bitmap));
+ refcount_set(&iobm->refcnt, 1);
+ }
- /*
- * Now that we have an IO bitmap, we need our TSS limit to be
- * correct. It's fine if we are preempted after doing this:
- * with TIF_IO_BITMAP set, context switches will keep our TSS
- * limit correct.
- */
- preempt_disable();
- refresh_tss_limit();
- preempt_enable();
+ /*
+ * If the bitmap is not shared, then nothing can take a refcount as
+ * current can obviously not fork at the same time. If it's shared
+ * duplicate it and drop the refcount on the original one.
+ */
+ if (refcount_read(&iobm->refcnt) > 1) {
+ iobm = kmemdup(iobm, sizeof(*iobm), GFP_KERNEL);
+ if (!iobm)
+ return -ENOMEM;
+ refcount_set(&iobm->refcnt, 1);
+ io_bitmap_exit();
}
/*
- * do it in the per-thread copy and in the TSS ...
- *
- * Disable preemption via get_cpu() - we must not switch away
- * because the ->io_bitmap_max value must match the bitmap
- * contents:
+ * Store the bitmap pointer (might be the same if the task already
+ * head one). Must be done here so freeing the bitmap when all
+ * permissions are dropped has the pointer set up.
*/
- tss = &per_cpu(cpu_tss_rw, get_cpu());
+ t->io_bitmap = iobm;
+ /* Mark it active for context switching and exit to user mode */
+ set_thread_flag(TIF_IO_BITMAP);
+ /*
+ * Update the tasks bitmap. The update of the TSS bitmap happens on
+ * exit to user mode. So this needs no protection.
+ */
if (turn_on)
- bitmap_clear(t->io_bitmap_ptr, from, num);
+ bitmap_clear(iobm->bitmap, from, num);
else
- bitmap_set(t->io_bitmap_ptr, from, num);
+ bitmap_set(iobm->bitmap, from, num);
/*
* Search for a (possibly new) maximum. This is simple and stupid,
* to keep it obviously correct:
*/
- max_long = 0;
- for (i = 0; i < IO_BITMAP_LONGS; i++)
- if (t->io_bitmap_ptr[i] != ~0UL)
+ max_long = UINT_MAX;
+ for (i = 0; i < IO_BITMAP_LONGS; i++) {
+ if (iobm->bitmap[i] != ~0UL)
max_long = i;
+ }
+ /* All permissions dropped? */
+ if (max_long == UINT_MAX) {
+ io_bitmap_exit();
+ return 0;
+ }
- bytes = (max_long + 1) * sizeof(unsigned long);
- bytes_updated = max(bytes, t->io_bitmap_max);
-
- t->io_bitmap_max = bytes;
-
- /* Update the TSS: */
- memcpy(tss->io_bitmap, t->io_bitmap_ptr, bytes_updated);
+ iobm->max = (max_long + 1) * sizeof(unsigned long);
- put_cpu();
+ /*
+ * Update the sequence number to force a TSS update on return to
+ * user mode.
+ */
+ iobm->sequence = atomic64_add_return(1, &io_bitmap_sequence);
return 0;
}
@@ -104,38 +154,61 @@ SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
}
/*
- * sys_iopl has to be used when you want to access the IO ports
- * beyond the 0x3ff range: to get the full 65536 ports bitmapped
- * you'd need 8kB of bitmaps/process, which is a bit excessive.
+ * The sys_iopl functionality depends on the level argument, which if
+ * granted for the task is used to enable access to all 65536 I/O ports.
+ *
+ * This does not use the IOPL mechanism provided by the CPU as that would
+ * also allow the user space task to use the CLI/STI instructions.
*
- * Here we just change the flags value on the stack: we allow
- * only the super-user to do it. This depends on the stack-layout
- * on system-call entry - see also fork() and the signal handling
- * code.
+ * Disabling interrupts in a user space task is dangerous as it might lock
+ * up the machine and the semantics vs. syscalls and exceptions is
+ * undefined.
+ *
+ * Setting IOPL to level 0-2 is disabling I/O permissions. Level 3
+ * 3 enables them.
+ *
+ * IOPL is strictly per thread and inherited on fork.
*/
SYSCALL_DEFINE1(iopl, unsigned int, level)
{
- struct pt_regs *regs = current_pt_regs();
struct thread_struct *t = &current->thread;
-
- /*
- * Careful: the IOPL bits in regs->flags are undefined under Xen PV
- * and changing them has no effect.
- */
- unsigned int old = t->iopl >> X86_EFLAGS_IOPL_BIT;
+ unsigned int old;
if (level > 3)
return -EINVAL;
+
+ old = t->iopl_emul;
+
+ /* No point in going further if nothing changes */
+ if (level == old)
+ return 0;
+
/* Trying to gain more privileges? */
if (level > old) {
if (!capable(CAP_SYS_RAWIO) ||
security_locked_down(LOCKDOWN_IOPORT))
return -EPERM;
}
- regs->flags = (regs->flags & ~X86_EFLAGS_IOPL) |
- (level << X86_EFLAGS_IOPL_BIT);
- t->iopl = level << X86_EFLAGS_IOPL_BIT;
- set_iopl_mask(t->iopl);
+
+ t->iopl_emul = level;
+ task_update_io_bitmap();
return 0;
}
+
+#else /* CONFIG_X86_IOPL_IOPERM */
+
+long ksys_ioperm(unsigned long from, unsigned long num, int turn_on)
+{
+ return -ENOSYS;
+}
+SYSCALL_DEFINE3(ioperm, unsigned long, from, unsigned long, num, int, turn_on)
+{
+ return -ENOSYS;
+}
+
+SYSCALL_DEFINE1(iopl, unsigned int, level)
+{
+ return -ENOSYS;
+}
+#endif
diff --git a/arch/x86/kernel/irqflags.S b/arch/x86/kernel/irqflags.S
index ddeeaac8adda..0db0375235b4 100644
--- a/arch/x86/kernel/irqflags.S
+++ b/arch/x86/kernel/irqflags.S
@@ -7,20 +7,20 @@
/*
* unsigned long native_save_fl(void)
*/
-ENTRY(native_save_fl)
+SYM_FUNC_START(native_save_fl)
pushf
pop %_ASM_AX
ret
-ENDPROC(native_save_fl)
+SYM_FUNC_END(native_save_fl)
EXPORT_SYMBOL(native_save_fl)
/*
* void native_restore_fl(unsigned long flags)
* %eax/%rdi: flags
*/
-ENTRY(native_restore_fl)
+SYM_FUNC_START(native_restore_fl)
push %_ASM_ARG1
popf
ret
-ENDPROC(native_restore_fl)
+SYM_FUNC_END(native_restore_fl)
EXPORT_SYMBOL(native_restore_fl)
diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c
index 3ad34f01de2a..6eb8b50ea07e 100644
--- a/arch/x86/kernel/jailhouse.c
+++ b/arch/x86/kernel/jailhouse.c
@@ -11,6 +11,7 @@
#include <linux/acpi_pmtmr.h>
#include <linux/kernel.h>
#include <linux/reboot.h>
+#include <linux/serial_8250.h>
#include <asm/apic.h>
#include <asm/cpu.h>
#include <asm/hypervisor.h>
@@ -21,9 +22,24 @@
#include <asm/setup.h>
#include <asm/jailhouse_para.h>
-static __initdata struct jailhouse_setup_data setup_data;
+static struct jailhouse_setup_data setup_data;
+#define SETUP_DATA_V1_LEN (sizeof(setup_data.hdr) + sizeof(setup_data.v1))
+#define SETUP_DATA_V2_LEN (SETUP_DATA_V1_LEN + sizeof(setup_data.v2))
+
static unsigned int precalibrated_tsc_khz;
+static void jailhouse_setup_irq(unsigned int irq)
+{
+ struct mpc_intsrc mp_irq = {
+ .type = MP_INTSRC,
+ .irqtype = mp_INT,
+ .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
+ .srcbusirq = irq,
+ .dstirq = irq,
+ };
+ mp_save_irq(&mp_irq);
+}
+
static uint32_t jailhouse_cpuid_base(void)
{
if (boot_cpu_data.cpuid_level < 0 ||
@@ -45,7 +61,7 @@ static void jailhouse_get_wallclock(struct timespec64 *now)
static void __init jailhouse_timer_init(void)
{
- lapic_timer_period = setup_data.apic_khz * (1000 / HZ);
+ lapic_timer_period = setup_data.v1.apic_khz * (1000 / HZ);
}
static unsigned long jailhouse_get_tsc(void)
@@ -77,33 +93,28 @@ static void __init jailhouse_get_smp_config(unsigned int early)
.type = IOAPIC_DOMAIN_STRICT,
.ops = &mp_ioapic_irqdomain_ops,
};
- struct mpc_intsrc mp_irq = {
- .type = MP_INTSRC,
- .irqtype = mp_INT,
- .irqflag = MP_IRQPOL_ACTIVE_HIGH | MP_IRQTRIG_EDGE,
- };
unsigned int cpu;
jailhouse_x2apic_init();
register_lapic_address(0xfee00000);
- for (cpu = 0; cpu < setup_data.num_cpus; cpu++) {
- generic_processor_info(setup_data.cpu_ids[cpu],
+ for (cpu = 0; cpu < setup_data.v1.num_cpus; cpu++) {
+ generic_processor_info(setup_data.v1.cpu_ids[cpu],
boot_cpu_apic_version);
}
smp_found_config = 1;
- if (setup_data.standard_ioapic) {
+ if (setup_data.v1.standard_ioapic) {
mp_register_ioapic(0, 0xfec00000, gsi_top, &ioapic_cfg);
- /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
- mp_irq.srcbusirq = mp_irq.dstirq = 3;
- mp_save_irq(&mp_irq);
-
- mp_irq.srcbusirq = mp_irq.dstirq = 4;
- mp_save_irq(&mp_irq);
+ if (IS_ENABLED(CONFIG_SERIAL_8250) &&
+ setup_data.hdr.version < 2) {
+ /* Register 1:1 mapping for legacy UART IRQs 3 and 4 */
+ jailhouse_setup_irq(3);
+ jailhouse_setup_irq(4);
+ }
}
}
@@ -126,9 +137,9 @@ static int __init jailhouse_pci_arch_init(void)
pcibios_last_bus = 0xff;
#ifdef CONFIG_PCI_MMCONFIG
- if (setup_data.pci_mmconfig_base) {
+ if (setup_data.v1.pci_mmconfig_base) {
pci_mmconfig_add(0, 0, pcibios_last_bus,
- setup_data.pci_mmconfig_base);
+ setup_data.v1.pci_mmconfig_base);
pci_mmcfg_arch_init();
}
#endif
@@ -136,9 +147,57 @@ static int __init jailhouse_pci_arch_init(void)
return 0;
}
+#ifdef CONFIG_SERIAL_8250
+static inline bool jailhouse_uart_enabled(unsigned int uart_nr)
+{
+ return setup_data.v2.flags & BIT(uart_nr);
+}
+
+static void jailhouse_serial_fixup(int port, struct uart_port *up,
+ u32 *capabilities)
+{
+ static const u16 pcuart_base[] = {0x3f8, 0x2f8, 0x3e8, 0x2e8};
+ unsigned int n;
+
+ for (n = 0; n < ARRAY_SIZE(pcuart_base); n++) {
+ if (pcuart_base[n] != up->iobase)
+ continue;
+
+ if (jailhouse_uart_enabled(n)) {
+ pr_info("Enabling UART%u (port 0x%lx)\n", n,
+ up->iobase);
+ jailhouse_setup_irq(up->irq);
+ } else {
+ /* Deactivate UART if access isn't allowed */
+ up->iobase = 0;
+ }
+ break;
+ }
+}
+
+static void __init jailhouse_serial_workaround(void)
+{
+ /*
+ * There are flags inside setup_data that indicate availability of
+ * platform UARTs since setup data version 2.
+ *
+ * In case of version 1, we don't know which UARTs belong Linux. In
+ * this case, unconditionally register 1:1 mapping for legacy UART IRQs
+ * 3 and 4.
+ */
+ if (setup_data.hdr.version > 1)
+ serial8250_set_isa_configurator(jailhouse_serial_fixup);
+}
+#else /* !CONFIG_SERIAL_8250 */
+static inline void jailhouse_serial_workaround(void)
+{
+}
+#endif /* CONFIG_SERIAL_8250 */
+
static void __init jailhouse_init_platform(void)
{
u64 pa_data = boot_params.hdr.setup_data;
+ unsigned long setup_data_len;
struct setup_data header;
void *mapping;
@@ -163,16 +222,8 @@ static void __init jailhouse_init_platform(void)
memcpy(&header, mapping, sizeof(header));
early_memunmap(mapping, sizeof(header));
- if (header.type == SETUP_JAILHOUSE &&
- header.len >= sizeof(setup_data)) {
- pa_data += offsetof(struct setup_data, data);
-
- mapping = early_memremap(pa_data, sizeof(setup_data));
- memcpy(&setup_data, mapping, sizeof(setup_data));
- early_memunmap(mapping, sizeof(setup_data));
-
+ if (header.type == SETUP_JAILHOUSE)
break;
- }
pa_data = header.next;
}
@@ -180,13 +231,28 @@ static void __init jailhouse_init_platform(void)
if (!pa_data)
panic("Jailhouse: No valid setup data found");
- if (setup_data.compatible_version > JAILHOUSE_SETUP_REQUIRED_VERSION)
- panic("Jailhouse: Unsupported setup data structure");
-
- pmtmr_ioport = setup_data.pm_timer_address;
+ /* setup data must at least contain the header */
+ if (header.len < sizeof(setup_data.hdr))
+ goto unsupported;
+
+ pa_data += offsetof(struct setup_data, data);
+ setup_data_len = min_t(unsigned long, sizeof(setup_data),
+ (unsigned long)header.len);
+ mapping = early_memremap(pa_data, setup_data_len);
+ memcpy(&setup_data, mapping, setup_data_len);
+ early_memunmap(mapping, setup_data_len);
+
+ if (setup_data.hdr.version == 0 ||
+ setup_data.hdr.compatible_version !=
+ JAILHOUSE_SETUP_REQUIRED_VERSION ||
+ (setup_data.hdr.version == 1 && header.len < SETUP_DATA_V1_LEN) ||
+ (setup_data.hdr.version >= 2 && header.len < SETUP_DATA_V2_LEN))
+ goto unsupported;
+
+ pmtmr_ioport = setup_data.v1.pm_timer_address;
pr_debug("Jailhouse: PM-Timer IO Port: %#x\n", pmtmr_ioport);
- precalibrated_tsc_khz = setup_data.tsc_khz;
+ precalibrated_tsc_khz = setup_data.v1.tsc_khz;
setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
pci_probe = 0;
@@ -196,6 +262,12 @@ static void __init jailhouse_init_platform(void)
* are none in a non-root cell.
*/
disable_acpi();
+
+ jailhouse_serial_workaround();
+ return;
+
+unsupported:
+ panic("Jailhouse: Unsupported setup data structure");
}
bool jailhouse_paravirt(void)
diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c
index 044053235302..c1a8b9e71408 100644
--- a/arch/x86/kernel/jump_label.c
+++ b/arch/x86/kernel/jump_label.c
@@ -89,8 +89,7 @@ static void __ref __jump_label_transform(struct jump_entry *entry,
return;
}
- text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE,
- (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE);
+ text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL);
}
void arch_jump_label_transform(struct jump_entry *entry,
@@ -147,11 +146,9 @@ bool arch_jump_label_transform_queue(struct jump_entry *entry,
}
__jump_label_set_jump_code(entry, type,
- (union jump_code_union *) &tp->opcode, 0);
+ (union jump_code_union *)&tp->text, 0);
- tp->addr = entry_code;
- tp->detour = entry_code + JUMP_LABEL_NOP_SIZE;
- tp->len = JUMP_LABEL_NOP_SIZE;
+ text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL);
tp_vec_nr++;
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c
index edaa30b20841..64b6da95af98 100644
--- a/arch/x86/kernel/kdebugfs.c
+++ b/arch/x86/kernel/kdebugfs.c
@@ -44,7 +44,12 @@ static ssize_t setup_data_read(struct file *file, char __user *user_buf,
if (count > node->len - pos)
count = node->len - pos;
- pa = node->paddr + sizeof(struct setup_data) + pos;
+ pa = node->paddr + pos;
+
+ /* Is it direct data or invalid indirect one? */
+ if (!(node->type & SETUP_INDIRECT) || node->type == SETUP_INDIRECT)
+ pa += sizeof(struct setup_data);
+
p = memremap(pa, count, MEMREMAP_WB);
if (!p)
return -ENOMEM;
@@ -108,9 +113,17 @@ static int __init create_setup_data_nodes(struct dentry *parent)
goto err_dir;
}
- node->paddr = pa_data;
- node->type = data->type;
- node->len = data->len;
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+ node->paddr = ((struct setup_indirect *)data->data)->addr;
+ node->type = ((struct setup_indirect *)data->data)->type;
+ node->len = ((struct setup_indirect *)data->data)->len;
+ } else {
+ node->paddr = pa_data;
+ node->type = data->type;
+ node->len = data->len;
+ }
+
create_setup_data_node(d, no, node);
pa_data = data->next;
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 43fc13c831af..4f13af7cbcdb 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -351,6 +351,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn)
kernel_insn_init(insn, dest, MAX_INSN_SIZE);
insn_get_length(insn);
+ /* We can not probe force emulate prefixed instruction */
+ if (insn_has_emulate_prefix(insn))
+ return 0;
+
/* Another subsystem puts a breakpoint, failed to recover */
if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION)
return 0;
diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c
index b348dd506d58..8900329c28a7 100644
--- a/arch/x86/kernel/kprobes/opt.c
+++ b/arch/x86/kernel/kprobes/opt.c
@@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_head *oplist)
insn_buff[0] = RELATIVEJUMP_OPCODE;
*(s32 *)(&insn_buff[1]) = rel;
- text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL);
list_del_init(&op->list);
}
@@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_head *oplist)
void arch_unoptimize_kprobe(struct optimized_kprobe *op)
{
u8 insn_buff[RELATIVEJUMP_SIZE];
+ u8 emulate_buff[RELATIVEJUMP_SIZE];
/* Set int3 to first byte for kprobes */
insn_buff[0] = BREAKPOINT_INSTRUCTION;
memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE);
+
+ emulate_buff[0] = RELATIVEJUMP_OPCODE;
+ *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn -
+ ((long)op->kp.addr + RELATIVEJUMP_SIZE));
+
text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE,
- op->optinsn.insn);
+ emulate_buff);
}
/*
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c
index 7969da939213..d0a19121c6a4 100644
--- a/arch/x86/kernel/ksysfs.c
+++ b/arch/x86/kernel/ksysfs.c
@@ -100,7 +100,12 @@ static int __init get_setup_data_size(int nr, size_t *size)
if (!data)
return -ENOMEM;
if (nr == i) {
- *size = data->len;
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+ *size = ((struct setup_indirect *)data->data)->len;
+ else
+ *size = data->len;
+
memunmap(data);
return 0;
}
@@ -130,7 +135,10 @@ static ssize_t type_show(struct kobject *kobj,
if (!data)
return -ENOMEM;
- ret = sprintf(buf, "0x%x\n", data->type);
+ if (data->type == SETUP_INDIRECT)
+ ret = sprintf(buf, "0x%x\n", ((struct setup_indirect *)data->data)->type);
+ else
+ ret = sprintf(buf, "0x%x\n", data->type);
memunmap(data);
return ret;
}
@@ -142,7 +150,7 @@ static ssize_t setup_data_data_read(struct file *fp,
loff_t off, size_t count)
{
int nr, ret = 0;
- u64 paddr;
+ u64 paddr, len;
struct setup_data *data;
void *p;
@@ -157,19 +165,28 @@ static ssize_t setup_data_data_read(struct file *fp,
if (!data)
return -ENOMEM;
- if (off > data->len) {
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+ paddr = ((struct setup_indirect *)data->data)->addr;
+ len = ((struct setup_indirect *)data->data)->len;
+ } else {
+ paddr += sizeof(*data);
+ len = data->len;
+ }
+
+ if (off > len) {
ret = -EINVAL;
goto out;
}
- if (count > data->len - off)
- count = data->len - off;
+ if (count > len - off)
+ count = len - off;
if (!count)
goto out;
ret = count;
- p = memremap(paddr + sizeof(*data), data->len, MEMREMAP_WB);
+ p = memremap(paddr, len, MEMREMAP_WB);
if (!p) {
ret = -ENOMEM;
goto out;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 5dcd438ad8f2..16e125a50b33 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -298,48 +298,6 @@ static void load_segments(void)
);
}
-#ifdef CONFIG_KEXEC_FILE
-/* Update purgatory as needed after various image segments have been prepared */
-static int arch_update_purgatory(struct kimage *image)
-{
- int ret = 0;
-
- if (!image->file_mode)
- return 0;
-
- /* Setup copying of backup region */
- if (image->type == KEXEC_TYPE_CRASH) {
- ret = kexec_purgatory_get_set_symbol(image,
- "purgatory_backup_dest",
- &image->arch.backup_load_addr,
- sizeof(image->arch.backup_load_addr), 0);
- if (ret)
- return ret;
-
- ret = kexec_purgatory_get_set_symbol(image,
- "purgatory_backup_src",
- &image->arch.backup_src_start,
- sizeof(image->arch.backup_src_start), 0);
- if (ret)
- return ret;
-
- ret = kexec_purgatory_get_set_symbol(image,
- "purgatory_backup_sz",
- &image->arch.backup_src_sz,
- sizeof(image->arch.backup_src_sz), 0);
- if (ret)
- return ret;
- }
-
- return ret;
-}
-#else /* !CONFIG_KEXEC_FILE */
-static inline int arch_update_purgatory(struct kimage *image)
-{
- return 0;
-}
-#endif /* CONFIG_KEXEC_FILE */
-
int machine_kexec_prepare(struct kimage *image)
{
unsigned long start_pgtable;
@@ -353,11 +311,6 @@ int machine_kexec_prepare(struct kimage *image)
if (result)
return result;
- /* update purgatory as needed */
- result = arch_update_purgatory(image);
- if (result)
- return result;
-
return 0;
}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 59d3d2763a9e..789f5e4f89de 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -341,8 +341,6 @@ struct paravirt_patch_template pv_ops = {
.cpu.iret = native_iret,
.cpu.swapgs = native_swapgs,
- .cpu.set_iopl_mask = native_set_iopl_mask,
-
.cpu.start_context_switch = paravirt_nop,
.cpu.end_context_switch = paravirt_nop,
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
deleted file mode 100644
index 23fdec030c37..000000000000
--- a/arch/x86/kernel/pci-calgary_64.c
+++ /dev/null
@@ -1,1586 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Derived from arch/powerpc/kernel/iommu.c
- *
- * Copyright IBM Corporation, 2006-2007
- * Copyright (C) 2006 Jon Mason <jdmason@kudzu.us>
- *
- * Author: Jon Mason <jdmason@kudzu.us>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
-
- */
-
-#define pr_fmt(fmt) "Calgary: " fmt
-
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/crash_dump.h>
-#include <linux/dma-mapping.h>
-#include <linux/dma-direct.h>
-#include <linux/bitmap.h>
-#include <linux/pci_ids.h>
-#include <linux/pci.h>
-#include <linux/delay.h>
-#include <linux/scatterlist.h>
-#include <linux/iommu-helper.h>
-
-#include <asm/iommu.h>
-#include <asm/calgary.h>
-#include <asm/tce.h>
-#include <asm/pci-direct.h>
-#include <asm/dma.h>
-#include <asm/rio.h>
-#include <asm/bios_ebda.h>
-#include <asm/x86_init.h>
-#include <asm/iommu_table.h>
-
-#ifdef CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT
-int use_calgary __read_mostly = 1;
-#else
-int use_calgary __read_mostly = 0;
-#endif /* CONFIG_CALGARY_DEFAULT_ENABLED */
-
-#define PCI_DEVICE_ID_IBM_CALGARY 0x02a1
-#define PCI_DEVICE_ID_IBM_CALIOC2 0x0308
-
-/* register offsets inside the host bridge space */
-#define CALGARY_CONFIG_REG 0x0108
-#define PHB_CSR_OFFSET 0x0110 /* Channel Status */
-#define PHB_PLSSR_OFFSET 0x0120
-#define PHB_CONFIG_RW_OFFSET 0x0160
-#define PHB_IOBASE_BAR_LOW 0x0170
-#define PHB_IOBASE_BAR_HIGH 0x0180
-#define PHB_MEM_1_LOW 0x0190
-#define PHB_MEM_1_HIGH 0x01A0
-#define PHB_IO_ADDR_SIZE 0x01B0
-#define PHB_MEM_1_SIZE 0x01C0
-#define PHB_MEM_ST_OFFSET 0x01D0
-#define PHB_AER_OFFSET 0x0200
-#define PHB_CONFIG_0_HIGH 0x0220
-#define PHB_CONFIG_0_LOW 0x0230
-#define PHB_CONFIG_0_END 0x0240
-#define PHB_MEM_2_LOW 0x02B0
-#define PHB_MEM_2_HIGH 0x02C0
-#define PHB_MEM_2_SIZE_HIGH 0x02D0
-#define PHB_MEM_2_SIZE_LOW 0x02E0
-#define PHB_DOSHOLE_OFFSET 0x08E0
-
-/* CalIOC2 specific */
-#define PHB_SAVIOR_L2 0x0DB0
-#define PHB_PAGE_MIG_CTRL 0x0DA8
-#define PHB_PAGE_MIG_DEBUG 0x0DA0
-#define PHB_ROOT_COMPLEX_STATUS 0x0CB0
-
-/* PHB_CONFIG_RW */
-#define PHB_TCE_ENABLE 0x20000000
-#define PHB_SLOT_DISABLE 0x1C000000
-#define PHB_DAC_DISABLE 0x01000000
-#define PHB_MEM2_ENABLE 0x00400000
-#define PHB_MCSR_ENABLE 0x00100000
-/* TAR (Table Address Register) */
-#define TAR_SW_BITS 0x0000ffffffff800fUL
-#define TAR_VALID 0x0000000000000008UL
-/* CSR (Channel/DMA Status Register) */
-#define CSR_AGENT_MASK 0xffe0ffff
-/* CCR (Calgary Configuration Register) */
-#define CCR_2SEC_TIMEOUT 0x000000000000000EUL
-/* PMCR/PMDR (Page Migration Control/Debug Registers */
-#define PMR_SOFTSTOP 0x80000000
-#define PMR_SOFTSTOPFAULT 0x40000000
-#define PMR_HARDSTOP 0x20000000
-
-/*
- * The maximum PHB bus number.
- * x3950M2 (rare): 8 chassis, 48 PHBs per chassis = 384
- * x3950M2: 4 chassis, 48 PHBs per chassis = 192
- * x3950 (PCIE): 8 chassis, 32 PHBs per chassis = 256
- * x3950 (PCIX): 8 chassis, 16 PHBs per chassis = 128
- */
-#define MAX_PHB_BUS_NUM 256
-
-#define PHBS_PER_CALGARY 4
-
-/* register offsets in Calgary's internal register space */
-static const unsigned long tar_offsets[] = {
- 0x0580 /* TAR0 */,
- 0x0588 /* TAR1 */,
- 0x0590 /* TAR2 */,
- 0x0598 /* TAR3 */
-};
-
-static const unsigned long split_queue_offsets[] = {
- 0x4870 /* SPLIT QUEUE 0 */,
- 0x5870 /* SPLIT QUEUE 1 */,
- 0x6870 /* SPLIT QUEUE 2 */,
- 0x7870 /* SPLIT QUEUE 3 */
-};
-
-static const unsigned long phb_offsets[] = {
- 0x8000 /* PHB0 */,
- 0x9000 /* PHB1 */,
- 0xA000 /* PHB2 */,
- 0xB000 /* PHB3 */
-};
-
-/* PHB debug registers */
-
-static const unsigned long phb_debug_offsets[] = {
- 0x4000 /* PHB 0 DEBUG */,
- 0x5000 /* PHB 1 DEBUG */,
- 0x6000 /* PHB 2 DEBUG */,
- 0x7000 /* PHB 3 DEBUG */
-};
-
-/*
- * STUFF register for each debug PHB,
- * byte 1 = start bus number, byte 2 = end bus number
- */
-
-#define PHB_DEBUG_STUFF_OFFSET 0x0020
-
-unsigned int specified_table_size = TCE_TABLE_SIZE_UNSPECIFIED;
-static int translate_empty_slots __read_mostly = 0;
-static int calgary_detected __read_mostly = 0;
-
-static struct rio_table_hdr *rio_table_hdr __initdata;
-static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
-static struct rio_detail *rio_devs[MAX_NUMNODES * 4] __initdata;
-
-struct calgary_bus_info {
- void *tce_space;
- unsigned char translation_disabled;
- signed char phbid;
- void __iomem *bbar;
-};
-
-static void calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calgary_tce_cache_blast(struct iommu_table *tbl);
-static void calgary_dump_error_regs(struct iommu_table *tbl);
-static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev);
-static void calioc2_tce_cache_blast(struct iommu_table *tbl);
-static void calioc2_dump_error_regs(struct iommu_table *tbl);
-static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl);
-static void get_tce_space_from_tar(void);
-
-static const struct cal_chipset_ops calgary_chip_ops = {
- .handle_quirks = calgary_handle_quirks,
- .tce_cache_blast = calgary_tce_cache_blast,
- .dump_error_regs = calgary_dump_error_regs
-};
-
-static const struct cal_chipset_ops calioc2_chip_ops = {
- .handle_quirks = calioc2_handle_quirks,
- .tce_cache_blast = calioc2_tce_cache_blast,
- .dump_error_regs = calioc2_dump_error_regs
-};
-
-static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, };
-
-static inline int translation_enabled(struct iommu_table *tbl)
-{
- /* only PHBs with translation enabled have an IOMMU table */
- return (tbl != NULL);
-}
-
-static void iommu_range_reserve(struct iommu_table *tbl,
- unsigned long start_addr, unsigned int npages)
-{
- unsigned long index;
- unsigned long end;
- unsigned long flags;
-
- index = start_addr >> PAGE_SHIFT;
-
- /* bail out if we're asked to reserve a region we don't cover */
- if (index >= tbl->it_size)
- return;
-
- end = index + npages;
- if (end > tbl->it_size) /* don't go off the table */
- end = tbl->it_size;
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- bitmap_set(tbl->it_map, index, npages);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static unsigned long iommu_range_alloc(struct device *dev,
- struct iommu_table *tbl,
- unsigned int npages)
-{
- unsigned long flags;
- unsigned long offset;
- unsigned long boundary_size;
-
- boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1,
- PAGE_SIZE) >> PAGE_SHIFT;
-
- BUG_ON(npages == 0);
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- offset = iommu_area_alloc(tbl->it_map, tbl->it_size, tbl->it_hint,
- npages, 0, boundary_size, 0);
- if (offset == ~0UL) {
- tbl->chip_ops->tce_cache_blast(tbl);
-
- offset = iommu_area_alloc(tbl->it_map, tbl->it_size, 0,
- npages, 0, boundary_size, 0);
- if (offset == ~0UL) {
- pr_warn("IOMMU full\n");
- spin_unlock_irqrestore(&tbl->it_lock, flags);
- if (panic_on_overflow)
- panic("Calgary: fix the allocator.\n");
- else
- return DMA_MAPPING_ERROR;
- }
- }
-
- tbl->it_hint = offset + npages;
- BUG_ON(tbl->it_hint > tbl->it_size);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-
- return offset;
-}
-
-static dma_addr_t iommu_alloc(struct device *dev, struct iommu_table *tbl,
- void *vaddr, unsigned int npages, int direction)
-{
- unsigned long entry;
- dma_addr_t ret;
-
- entry = iommu_range_alloc(dev, tbl, npages);
- if (unlikely(entry == DMA_MAPPING_ERROR)) {
- pr_warn("failed to allocate %u pages in iommu %p\n",
- npages, tbl);
- return DMA_MAPPING_ERROR;
- }
-
- /* set the return dma address */
- ret = (entry << PAGE_SHIFT) | ((unsigned long)vaddr & ~PAGE_MASK);
-
- /* put the TCEs in the HW table */
- tce_build(tbl, entry, npages, (unsigned long)vaddr & PAGE_MASK,
- direction);
- return ret;
-}
-
-static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
- unsigned int npages)
-{
- unsigned long entry;
- unsigned long flags;
-
- /* were we called with bad_dma_address? */
- if (unlikely(dma_addr == DMA_MAPPING_ERROR)) {
- WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA "
- "address 0x%Lx\n", dma_addr);
- return;
- }
-
- entry = dma_addr >> PAGE_SHIFT;
-
- BUG_ON(entry + npages > tbl->it_size);
-
- tce_free(tbl, entry, npages);
-
- spin_lock_irqsave(&tbl->it_lock, flags);
-
- bitmap_clear(tbl->it_map, entry, npages);
-
- spin_unlock_irqrestore(&tbl->it_lock, flags);
-}
-
-static inline struct iommu_table *find_iommu_table(struct device *dev)
-{
- struct pci_dev *pdev;
- struct pci_bus *pbus;
- struct iommu_table *tbl;
-
- pdev = to_pci_dev(dev);
-
- /* search up the device tree for an iommu */
- pbus = pdev->bus;
- do {
- tbl = pci_iommu(pbus);
- if (tbl && tbl->it_busno == pbus->number)
- break;
- tbl = NULL;
- pbus = pbus->parent;
- } while (pbus);
-
- BUG_ON(tbl && (tbl->it_busno != pbus->number));
-
- return tbl;
-}
-
-static void calgary_unmap_sg(struct device *dev, struct scatterlist *sglist,
- int nelems,enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
- struct scatterlist *s;
- int i;
-
- if (!translation_enabled(tbl))
- return;
-
- for_each_sg(sglist, s, nelems, i) {
- unsigned int npages;
- dma_addr_t dma = s->dma_address;
- unsigned int dmalen = s->dma_length;
-
- if (dmalen == 0)
- break;
-
- npages = iommu_num_pages(dma, dmalen, PAGE_SIZE);
- iommu_free(tbl, dma, npages);
- }
-}
-
-static int calgary_map_sg(struct device *dev, struct scatterlist *sg,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
- struct scatterlist *s;
- unsigned long vaddr;
- unsigned int npages;
- unsigned long entry;
- int i;
-
- for_each_sg(sg, s, nelems, i) {
- BUG_ON(!sg_page(s));
-
- vaddr = (unsigned long) sg_virt(s);
- npages = iommu_num_pages(vaddr, s->length, PAGE_SIZE);
-
- entry = iommu_range_alloc(dev, tbl, npages);
- if (entry == DMA_MAPPING_ERROR) {
- /* makes sure unmap knows to stop */
- s->dma_length = 0;
- goto error;
- }
-
- s->dma_address = (entry << PAGE_SHIFT) | s->offset;
-
- /* insert into HW table */
- tce_build(tbl, entry, npages, vaddr & PAGE_MASK, dir);
-
- s->dma_length = s->length;
- }
-
- return nelems;
-error:
- calgary_unmap_sg(dev, sg, nelems, dir, 0);
- for_each_sg(sg, s, nelems, i) {
- sg->dma_address = DMA_MAPPING_ERROR;
- sg->dma_length = 0;
- }
- return 0;
-}
-
-static dma_addr_t calgary_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- void *vaddr = page_address(page) + offset;
- unsigned long uaddr;
- unsigned int npages;
- struct iommu_table *tbl = find_iommu_table(dev);
-
- uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, PAGE_SIZE);
-
- return iommu_alloc(dev, tbl, vaddr, npages, dir);
-}
-
-static void calgary_unmap_page(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct iommu_table *tbl = find_iommu_table(dev);
- unsigned int npages;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- iommu_free(tbl, dma_addr, npages);
-}
-
-static void* calgary_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs)
-{
- void *ret = NULL;
- dma_addr_t mapping;
- unsigned int npages, order;
- struct iommu_table *tbl = find_iommu_table(dev);
-
- size = PAGE_ALIGN(size); /* size rounded up to full pages */
- npages = size >> PAGE_SHIFT;
- order = get_order(size);
-
- /* alloc enough pages (and possibly more) */
- ret = (void *)__get_free_pages(flag, order);
- if (!ret)
- goto error;
- memset(ret, 0, size);
-
- /* set up tces to cover the allocated range */
- mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL);
- if (mapping == DMA_MAPPING_ERROR)
- goto free;
- *dma_handle = mapping;
- return ret;
-free:
- free_pages((unsigned long)ret, get_order(size));
- ret = NULL;
-error:
- return ret;
-}
-
-static void calgary_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- unsigned int npages;
- struct iommu_table *tbl = find_iommu_table(dev);
-
- size = PAGE_ALIGN(size);
- npages = size >> PAGE_SHIFT;
-
- iommu_free(tbl, dma_handle, npages);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-static const struct dma_map_ops calgary_dma_ops = {
- .alloc = calgary_alloc_coherent,
- .free = calgary_free_coherent,
- .map_sg = calgary_map_sg,
- .unmap_sg = calgary_unmap_sg,
- .map_page = calgary_map_page,
- .unmap_page = calgary_unmap_page,
- .dma_supported = dma_direct_supported,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
-};
-
-static inline void __iomem * busno_to_bbar(unsigned char num)
-{
- return bus_info[num].bbar;
-}
-
-static inline int busno_to_phbid(unsigned char num)
-{
- return bus_info[num].phbid;
-}
-
-static inline unsigned long split_queue_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return split_queue_offsets[idx];
-}
-
-static inline unsigned long tar_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return tar_offsets[idx];
-}
-
-static inline unsigned long phb_offset(unsigned char num)
-{
- size_t idx = busno_to_phbid(num);
-
- return phb_offsets[idx];
-}
-
-static inline void __iomem* calgary_reg(void __iomem *bar, unsigned long offset)
-{
- unsigned long target = ((unsigned long)bar) | offset;
- return (void __iomem*)target;
-}
-
-static inline int is_calioc2(unsigned short device)
-{
- return (device == PCI_DEVICE_ID_IBM_CALIOC2);
-}
-
-static inline int is_calgary(unsigned short device)
-{
- return (device == PCI_DEVICE_ID_IBM_CALGARY);
-}
-
-static inline int is_cal_pci_dev(unsigned short device)
-{
- return (is_calgary(device) || is_calioc2(device));
-}
-
-static void calgary_tce_cache_blast(struct iommu_table *tbl)
-{
- u64 val;
- u32 aer;
- int i = 0;
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
-
- /* disable arbitration on the bus */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
- aer = readl(target);
- writel(0, target);
-
- /* read plssr to ensure it got there */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
- val = readl(target);
-
- /* poll split queues until all DMA activity is done */
- target = calgary_reg(bbar, split_queue_offset(tbl->it_busno));
- do {
- val = readq(target);
- i++;
- } while ((val & 0xff) != 0xff && i < 100);
- if (i == 100)
- pr_warn("PCI bus not quiesced, continuing anyway\n");
-
- /* invalidate TCE cache */
- target = calgary_reg(bbar, tar_offset(tbl->it_busno));
- writeq(tbl->tar_val, target);
-
- /* enable arbitration */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_AER_OFFSET);
- writel(aer, target);
- (void)readl(target); /* flush */
-}
-
-static void calioc2_tce_cache_blast(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u64 val64;
- u32 val;
- int i = 0;
- int count = 1;
- unsigned char bus = tbl->it_busno;
-
-begin:
- printk(KERN_DEBUG "Calgary: CalIOC2 bus 0x%x entering tce cache blast "
- "sequence - count %d\n", bus, count);
-
- /* 1. using the Page Migration Control reg set SoftStop */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "1a. read 0x%x [LE] from %p\n", val, target);
- val |= PMR_SOFTSTOP;
- printk(KERN_DEBUG "1b. writing 0x%x [LE] to %p\n", val, target);
- writel(cpu_to_be32(val), target);
-
- /* 2. poll split queues until all DMA activity is done */
- printk(KERN_DEBUG "2a. starting to poll split queues\n");
- target = calgary_reg(bbar, split_queue_offset(bus));
- do {
- val64 = readq(target);
- i++;
- } while ((val64 & 0xff) != 0xff && i < 100);
- if (i == 100)
- pr_warn("CalIOC2: PCI bus not quiesced, continuing anyway\n");
-
- /* 3. poll Page Migration DEBUG for SoftStopFault */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "3. read 0x%x [LE] from %p\n", val, target);
-
- /* 4. if SoftStopFault - goto (1) */
- if (val & PMR_SOFTSTOPFAULT) {
- if (++count < 100)
- goto begin;
- else {
- pr_warn("CalIOC2: too many SoftStopFaults, aborting TCE cache flush sequence!\n");
- return; /* pray for the best */
- }
- }
-
- /* 5. Slam into HardStop by reading PHB_PAGE_MIG_CTRL */
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- printk(KERN_DEBUG "5a. slamming into HardStop by reading %p\n", target);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "5b. read 0x%x [LE] from %p\n", val, target);
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_DEBUG);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "5c. read 0x%x [LE] from %p (debug)\n", val, target);
-
- /* 6. invalidate TCE cache */
- printk(KERN_DEBUG "6. invalidating TCE cache\n");
- target = calgary_reg(bbar, tar_offset(bus));
- writeq(tbl->tar_val, target);
-
- /* 7. Re-read PMCR */
- printk(KERN_DEBUG "7a. Re-reading PMCR\n");
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "7b. read 0x%x [LE] from %p\n", val, target);
-
- /* 8. Remove HardStop */
- printk(KERN_DEBUG "8a. removing HardStop from PMCR\n");
- target = calgary_reg(bbar, phb_offset(bus) | PHB_PAGE_MIG_CTRL);
- val = 0;
- printk(KERN_DEBUG "8b. writing 0x%x [LE] to %p\n", val, target);
- writel(cpu_to_be32(val), target);
- val = be32_to_cpu(readl(target));
- printk(KERN_DEBUG "8c. read 0x%x [LE] from %p\n", val, target);
-}
-
-static void __init calgary_reserve_mem_region(struct pci_dev *dev, u64 start,
- u64 limit)
-{
- unsigned int numpages;
-
- limit = limit | 0xfffff;
- limit++;
-
- numpages = ((limit - start) >> PAGE_SHIFT);
- iommu_range_reserve(pci_iommu(dev->bus), start, numpages);
-}
-
-static void __init calgary_reserve_peripheral_mem_1(struct pci_dev *dev)
-{
- void __iomem *target;
- u64 low, high, sizelow;
- u64 start, limit;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
-
- /* peripheral MEM_1 region */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_LOW);
- low = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_HIGH);
- high = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_1_SIZE);
- sizelow = be32_to_cpu(readl(target));
-
- start = (high << 32) | low;
- limit = sizelow;
-
- calgary_reserve_mem_region(dev, start, limit);
-}
-
-static void __init calgary_reserve_peripheral_mem_2(struct pci_dev *dev)
-{
- void __iomem *target;
- u32 val32;
- u64 low, high, sizelow, sizehigh;
- u64 start, limit;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
-
- /* is it enabled? */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- if (!(val32 & PHB_MEM2_ENABLE))
- return;
-
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_LOW);
- low = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_HIGH);
- high = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_LOW);
- sizelow = be32_to_cpu(readl(target));
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_MEM_2_SIZE_HIGH);
- sizehigh = be32_to_cpu(readl(target));
-
- start = (high << 32) | low;
- limit = (sizehigh << 32) | sizelow;
-
- calgary_reserve_mem_region(dev, start, limit);
-}
-
-/*
- * some regions of the IO address space do not get translated, so we
- * must not give devices IO addresses in those regions. The regions
- * are the 640KB-1MB region and the two PCI peripheral memory holes.
- * Reserve all of them in the IOMMU bitmap to avoid giving them out
- * later.
- */
-static void __init calgary_reserve_regions(struct pci_dev *dev)
-{
- unsigned int npages;
- u64 start;
- struct iommu_table *tbl = pci_iommu(dev->bus);
-
- /* avoid the BIOS/VGA first 640KB-1MB region */
- /* for CalIOC2 - avoid the entire first MB */
- if (is_calgary(dev->device)) {
- start = (640 * 1024);
- npages = ((1024 - 640) * 1024) >> PAGE_SHIFT;
- } else { /* calioc2 */
- start = 0;
- npages = (1 * 1024 * 1024) >> PAGE_SHIFT;
- }
- iommu_range_reserve(tbl, start, npages);
-
- /* reserve the two PCI peripheral memory regions in IO space */
- calgary_reserve_peripheral_mem_1(dev);
- calgary_reserve_peripheral_mem_2(dev);
-}
-
-static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar)
-{
- u64 val64;
- u64 table_phys;
- void __iomem *target;
- int ret;
- struct iommu_table *tbl;
-
- /* build TCE tables for each PHB */
- ret = build_tce_table(dev, bbar);
- if (ret)
- return ret;
-
- tbl = pci_iommu(dev->bus);
- tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space;
-
- if (is_kdump_kernel())
- calgary_init_bitmap_from_tce_table(tbl);
- else
- tce_free(tbl, 0, tbl->it_size);
-
- if (is_calgary(dev->device))
- tbl->chip_ops = &calgary_chip_ops;
- else if (is_calioc2(dev->device))
- tbl->chip_ops = &calioc2_chip_ops;
- else
- BUG();
-
- calgary_reserve_regions(dev);
-
- /* set TARs for each PHB */
- target = calgary_reg(bbar, tar_offset(dev->bus->number));
- val64 = be64_to_cpu(readq(target));
-
- /* zero out all TAR bits under sw control */
- val64 &= ~TAR_SW_BITS;
- table_phys = (u64)__pa(tbl->it_base);
-
- val64 |= table_phys;
-
- BUG_ON(specified_table_size > TCE_TABLE_SIZE_8M);
- val64 |= (u64) specified_table_size;
-
- tbl->tar_val = cpu_to_be64(val64);
-
- writeq(tbl->tar_val, target);
- readq(target); /* flush */
-
- return 0;
-}
-
-static void __init calgary_free_bus(struct pci_dev *dev)
-{
- u64 val64;
- struct iommu_table *tbl = pci_iommu(dev->bus);
- void __iomem *target;
- unsigned int bitmapsz;
-
- target = calgary_reg(tbl->bbar, tar_offset(dev->bus->number));
- val64 = be64_to_cpu(readq(target));
- val64 &= ~TAR_SW_BITS;
- writeq(cpu_to_be64(val64), target);
- readq(target); /* flush */
-
- bitmapsz = tbl->it_size / BITS_PER_BYTE;
- free_pages((unsigned long)tbl->it_map, get_order(bitmapsz));
- tbl->it_map = NULL;
-
- kfree(tbl);
-
- set_pci_iommu(dev->bus, NULL);
-
- /* Can't free bootmem allocated memory after system is up :-( */
- bus_info[dev->bus->number].tce_space = NULL;
-}
-
-static void calgary_dump_error_regs(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u32 csr, plssr;
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
- csr = be32_to_cpu(readl(target));
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_PLSSR_OFFSET);
- plssr = be32_to_cpu(readl(target));
-
- /* If no error, the agent ID in the CSR is not valid */
- pr_emerg("DMA error on Calgary PHB 0x%x, 0x%08x@CSR 0x%08x@PLSSR\n",
- tbl->it_busno, csr, plssr);
-}
-
-static void calioc2_dump_error_regs(struct iommu_table *tbl)
-{
- void __iomem *bbar = tbl->bbar;
- u32 csr, csmr, plssr, mck, rcstat;
- void __iomem *target;
- unsigned long phboff = phb_offset(tbl->it_busno);
- unsigned long erroff;
- u32 errregs[7];
- int i;
-
- /* dump CSR */
- target = calgary_reg(bbar, phboff | PHB_CSR_OFFSET);
- csr = be32_to_cpu(readl(target));
- /* dump PLSSR */
- target = calgary_reg(bbar, phboff | PHB_PLSSR_OFFSET);
- plssr = be32_to_cpu(readl(target));
- /* dump CSMR */
- target = calgary_reg(bbar, phboff | 0x290);
- csmr = be32_to_cpu(readl(target));
- /* dump mck */
- target = calgary_reg(bbar, phboff | 0x800);
- mck = be32_to_cpu(readl(target));
-
- pr_emerg("DMA error on CalIOC2 PHB 0x%x\n", tbl->it_busno);
-
- pr_emerg("0x%08x@CSR 0x%08x@PLSSR 0x%08x@CSMR 0x%08x@MCK\n",
- csr, plssr, csmr, mck);
-
- /* dump rest of error regs */
- pr_emerg("");
- for (i = 0; i < ARRAY_SIZE(errregs); i++) {
- /* err regs are at 0x810 - 0x870 */
- erroff = (0x810 + (i * 0x10));
- target = calgary_reg(bbar, phboff | erroff);
- errregs[i] = be32_to_cpu(readl(target));
- pr_cont("0x%08x@0x%lx ", errregs[i], erroff);
- }
- pr_cont("\n");
-
- /* root complex status */
- target = calgary_reg(bbar, phboff | PHB_ROOT_COMPLEX_STATUS);
- rcstat = be32_to_cpu(readl(target));
- printk(KERN_EMERG "Calgary: 0x%08x@0x%x\n", rcstat,
- PHB_ROOT_COMPLEX_STATUS);
-}
-
-static void calgary_watchdog(struct timer_list *t)
-{
- struct iommu_table *tbl = from_timer(tbl, t, watchdog_timer);
- void __iomem *bbar = tbl->bbar;
- u32 val32;
- void __iomem *target;
-
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) | PHB_CSR_OFFSET);
- val32 = be32_to_cpu(readl(target));
-
- /* If no error, the agent ID in the CSR is not valid */
- if (val32 & CSR_AGENT_MASK) {
- tbl->chip_ops->dump_error_regs(tbl);
-
- /* reset error */
- writel(0, target);
-
- /* Disable bus that caused the error */
- target = calgary_reg(bbar, phb_offset(tbl->it_busno) |
- PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 |= PHB_SLOT_DISABLE;
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
- } else {
- /* Reset the timer */
- mod_timer(&tbl->watchdog_timer, jiffies + 2 * HZ);
- }
-}
-
-static void __init calgary_set_split_completion_timeout(void __iomem *bbar,
- unsigned char busnum, unsigned long timeout)
-{
- u64 val64;
- void __iomem *target;
- unsigned int phb_shift = ~0; /* silence gcc */
- u64 mask;
-
- switch (busno_to_phbid(busnum)) {
- case 0: phb_shift = (63 - 19);
- break;
- case 1: phb_shift = (63 - 23);
- break;
- case 2: phb_shift = (63 - 27);
- break;
- case 3: phb_shift = (63 - 35);
- break;
- default:
- BUG_ON(busno_to_phbid(busnum));
- }
-
- target = calgary_reg(bbar, CALGARY_CONFIG_REG);
- val64 = be64_to_cpu(readq(target));
-
- /* zero out this PHB's timer bits */
- mask = ~(0xFUL << phb_shift);
- val64 &= mask;
- val64 |= (timeout << phb_shift);
- writeq(cpu_to_be64(val64), target);
- readq(target); /* flush */
-}
-
-static void __init calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
- unsigned char busnum = dev->bus->number;
- void __iomem *bbar = tbl->bbar;
- void __iomem *target;
- u32 val;
-
- /*
- * CalIOC2 designers recommend setting bit 8 in 0xnDB0 to 1
- */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_SAVIOR_L2);
- val = cpu_to_be32(readl(target));
- val |= 0x00800000;
- writel(cpu_to_be32(val), target);
-}
-
-static void __init calgary_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev)
-{
- unsigned char busnum = dev->bus->number;
-
- /*
- * Give split completion a longer timeout on bus 1 for aic94xx
- * http://bugzilla.kernel.org/show_bug.cgi?id=7180
- */
- if (is_calgary(dev->device) && (busnum == 1))
- calgary_set_split_completion_timeout(tbl->bbar, busnum,
- CCR_2SEC_TIMEOUT);
-}
-
-static void __init calgary_enable_translation(struct pci_dev *dev)
-{
- u32 val32;
- unsigned char busnum;
- void __iomem *target;
- void __iomem *bbar;
- struct iommu_table *tbl;
-
- busnum = dev->bus->number;
- tbl = pci_iommu(dev->bus);
- bbar = tbl->bbar;
-
- /* enable TCE in PHB Config Register */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 |= PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE;
-
- printk(KERN_INFO "Calgary: enabling translation on %s PHB %#x\n",
- (dev->device == PCI_DEVICE_ID_IBM_CALGARY) ?
- "Calgary" : "CalIOC2", busnum);
- printk(KERN_INFO "Calgary: errant DMAs will now be prevented on this "
- "bus.\n");
-
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
-
- timer_setup(&tbl->watchdog_timer, calgary_watchdog, 0);
- mod_timer(&tbl->watchdog_timer, jiffies);
-}
-
-static void __init calgary_disable_translation(struct pci_dev *dev)
-{
- u32 val32;
- unsigned char busnum;
- void __iomem *target;
- void __iomem *bbar;
- struct iommu_table *tbl;
-
- busnum = dev->bus->number;
- tbl = pci_iommu(dev->bus);
- bbar = tbl->bbar;
-
- /* disable TCE in PHB Config Register */
- target = calgary_reg(bbar, phb_offset(busnum) | PHB_CONFIG_RW_OFFSET);
- val32 = be32_to_cpu(readl(target));
- val32 &= ~(PHB_TCE_ENABLE | PHB_DAC_DISABLE | PHB_MCSR_ENABLE);
-
- printk(KERN_INFO "Calgary: disabling translation on PHB %#x!\n", busnum);
- writel(cpu_to_be32(val32), target);
- readl(target); /* flush */
-
- del_timer_sync(&tbl->watchdog_timer);
-}
-
-static void __init calgary_init_one_nontraslated(struct pci_dev *dev)
-{
- pci_dev_get(dev);
- set_pci_iommu(dev->bus, NULL);
-
- /* is the device behind a bridge? */
- if (dev->bus->parent)
- dev->bus->parent->self = dev;
- else
- dev->bus->self = dev;
-}
-
-static int __init calgary_init_one(struct pci_dev *dev)
-{
- void __iomem *bbar;
- struct iommu_table *tbl;
- int ret;
-
- bbar = busno_to_bbar(dev->bus->number);
- ret = calgary_setup_tar(dev, bbar);
- if (ret)
- goto done;
-
- pci_dev_get(dev);
-
- if (dev->bus->parent) {
- if (dev->bus->parent->self)
- printk(KERN_WARNING "Calgary: IEEEE, dev %p has "
- "bus->parent->self!\n", dev);
- dev->bus->parent->self = dev;
- } else
- dev->bus->self = dev;
-
- tbl = pci_iommu(dev->bus);
- tbl->chip_ops->handle_quirks(tbl, dev);
-
- calgary_enable_translation(dev);
-
- return 0;
-
-done:
- return ret;
-}
-
-static int __init calgary_locate_bbars(void)
-{
- int ret;
- int rioidx, phb, bus;
- void __iomem *bbar;
- void __iomem *target;
- unsigned long offset;
- u8 start_bus, end_bus;
- u32 val;
-
- ret = -ENODATA;
- for (rioidx = 0; rioidx < rio_table_hdr->num_rio_dev; rioidx++) {
- struct rio_detail *rio = rio_devs[rioidx];
-
- if ((rio->type != COMPAT_CALGARY) && (rio->type != ALT_CALGARY))
- continue;
-
- /* map entire 1MB of Calgary config space */
- bbar = ioremap_nocache(rio->BBAR, 1024 * 1024);
- if (!bbar)
- goto error;
-
- for (phb = 0; phb < PHBS_PER_CALGARY; phb++) {
- offset = phb_debug_offsets[phb] | PHB_DEBUG_STUFF_OFFSET;
- target = calgary_reg(bbar, offset);
-
- val = be32_to_cpu(readl(target));
-
- start_bus = (u8)((val & 0x00FF0000) >> 16);
- end_bus = (u8)((val & 0x0000FF00) >> 8);
-
- if (end_bus) {
- for (bus = start_bus; bus <= end_bus; bus++) {
- bus_info[bus].bbar = bbar;
- bus_info[bus].phbid = phb;
- }
- } else {
- bus_info[start_bus].bbar = bbar;
- bus_info[start_bus].phbid = phb;
- }
- }
- }
-
- return 0;
-
-error:
- /* scan bus_info and iounmap any bbars we previously ioremap'd */
- for (bus = 0; bus < ARRAY_SIZE(bus_info); bus++)
- if (bus_info[bus].bbar)
- iounmap(bus_info[bus].bbar);
-
- return ret;
-}
-
-static int __init calgary_init(void)
-{
- int ret;
- struct pci_dev *dev = NULL;
- struct calgary_bus_info *info;
-
- ret = calgary_locate_bbars();
- if (ret)
- return ret;
-
- /* Purely for kdump kernel case */
- if (is_kdump_kernel())
- get_tce_space_from_tar();
-
- do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
-
- info = &bus_info[dev->bus->number];
- if (info->translation_disabled) {
- calgary_init_one_nontraslated(dev);
- continue;
- }
-
- if (!info->tce_space && !translate_empty_slots)
- continue;
-
- ret = calgary_init_one(dev);
- if (ret)
- goto error;
- } while (1);
-
- dev = NULL;
- for_each_pci_dev(dev) {
- struct iommu_table *tbl;
-
- tbl = find_iommu_table(&dev->dev);
-
- if (translation_enabled(tbl))
- dev->dev.dma_ops = &calgary_dma_ops;
- }
-
- return ret;
-
-error:
- do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
-
- info = &bus_info[dev->bus->number];
- if (info->translation_disabled) {
- pci_dev_put(dev);
- continue;
- }
- if (!info->tce_space && !translate_empty_slots)
- continue;
-
- calgary_disable_translation(dev);
- calgary_free_bus(dev);
- pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */
- dev->dev.dma_ops = NULL;
- } while (1);
-
- return ret;
-}
-
-static inline int __init determine_tce_table_size(void)
-{
- int ret;
-
- if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED)
- return specified_table_size;
-
- if (is_kdump_kernel() && saved_max_pfn) {
- /*
- * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to
- * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each
- * larger table size has twice as many entries, so shift the
- * max ram address by 13 to divide by 8K and then look at the
- * order of the result to choose between 0-7.
- */
- ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13);
- if (ret > TCE_TABLE_SIZE_8M)
- ret = TCE_TABLE_SIZE_8M;
- } else {
- /*
- * Use 8M by default (suggested by Muli) if it's not
- * kdump kernel and saved_max_pfn isn't set.
- */
- ret = TCE_TABLE_SIZE_8M;
- }
-
- return ret;
-}
-
-static int __init build_detail_arrays(void)
-{
- unsigned long ptr;
- unsigned numnodes, i;
- int scal_detail_size, rio_detail_size;
-
- numnodes = rio_table_hdr->num_scal_dev;
- if (numnodes > MAX_NUMNODES){
- printk(KERN_WARNING
- "Calgary: MAX_NUMNODES too low! Defined as %d, "
- "but system has %d nodes.\n",
- MAX_NUMNODES, numnodes);
- return -ENODEV;
- }
-
- switch (rio_table_hdr->version){
- case 2:
- scal_detail_size = 11;
- rio_detail_size = 13;
- break;
- case 3:
- scal_detail_size = 12;
- rio_detail_size = 15;
- break;
- default:
- printk(KERN_WARNING
- "Calgary: Invalid Rio Grande Table Version: %d\n",
- rio_table_hdr->version);
- return -EPROTO;
- }
-
- ptr = ((unsigned long)rio_table_hdr) + 3;
- for (i = 0; i < numnodes; i++, ptr += scal_detail_size)
- scal_devs[i] = (struct scal_detail *)ptr;
-
- for (i = 0; i < rio_table_hdr->num_rio_dev;
- i++, ptr += rio_detail_size)
- rio_devs[i] = (struct rio_detail *)ptr;
-
- return 0;
-}
-
-static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev)
-{
- int dev;
- u32 val;
-
- if (pci_dev == PCI_DEVICE_ID_IBM_CALIOC2) {
- /*
- * FIXME: properly scan for devices across the
- * PCI-to-PCI bridge on every CalIOC2 port.
- */
- return 1;
- }
-
- for (dev = 1; dev < 8; dev++) {
- val = read_pci_config(bus, dev, 0, 0);
- if (val != 0xffffffff)
- break;
- }
- return (val != 0xffffffff);
-}
-
-/*
- * calgary_init_bitmap_from_tce_table():
- * Function for kdump case. In the second/kdump kernel initialize
- * the bitmap based on the tce table entries obtained from first kernel
- */
-static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl)
-{
- u64 *tp;
- unsigned int index;
- tp = ((u64 *)tbl->it_base);
- for (index = 0 ; index < tbl->it_size; index++) {
- if (*tp != 0x0)
- set_bit(index, tbl->it_map);
- tp++;
- }
-}
-
-/*
- * get_tce_space_from_tar():
- * Function for kdump case. Get the tce tables from first kernel
- * by reading the contents of the base address register of calgary iommu
- */
-static void __init get_tce_space_from_tar(void)
-{
- int bus;
- void __iomem *target;
- unsigned long tce_space;
-
- for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
- struct calgary_bus_info *info = &bus_info[bus];
- unsigned short pci_device;
- u32 val;
-
- val = read_pci_config(bus, 0, 0, 0);
- pci_device = (val & 0xFFFF0000) >> 16;
-
- if (!is_cal_pci_dev(pci_device))
- continue;
- if (info->translation_disabled)
- continue;
-
- if (calgary_bus_has_devices(bus, pci_device) ||
- translate_empty_slots) {
- target = calgary_reg(bus_info[bus].bbar,
- tar_offset(bus));
- tce_space = be64_to_cpu(readq(target));
- tce_space = tce_space & TAR_SW_BITS;
-
- tce_space = tce_space & (~specified_table_size);
- info->tce_space = (u64 *)__va(tce_space);
- }
- }
- return;
-}
-
-static int __init calgary_iommu_init(void)
-{
- int ret;
-
- /* ok, we're trying to use Calgary - let's roll */
- printk(KERN_INFO "PCI-DMA: Using Calgary IOMMU\n");
-
- ret = calgary_init();
- if (ret) {
- printk(KERN_ERR "PCI-DMA: Calgary init failed %d, "
- "falling back to no_iommu\n", ret);
- return ret;
- }
-
- return 0;
-}
-
-int __init detect_calgary(void)
-{
- int bus;
- void *tbl;
- int calgary_found = 0;
- unsigned long ptr;
- unsigned int offset, prev_offset;
- int ret;
-
- /*
- * if the user specified iommu=off or iommu=soft or we found
- * another HW IOMMU already, bail out.
- */
- if (no_iommu || iommu_detected)
- return -ENODEV;
-
- if (!use_calgary)
- return -ENODEV;
-
- if (!early_pci_allowed())
- return -ENODEV;
-
- printk(KERN_DEBUG "Calgary: detecting Calgary via BIOS EBDA area\n");
-
- ptr = (unsigned long)phys_to_virt(get_bios_ebda());
-
- rio_table_hdr = NULL;
- prev_offset = 0;
- offset = 0x180;
- /*
- * The next offset is stored in the 1st word.
- * Only parse up until the offset increases:
- */
- while (offset > prev_offset) {
- /* The block id is stored in the 2nd word */
- if (*((unsigned short *)(ptr + offset + 2)) == 0x4752){
- /* set the pointer past the offset & block id */
- rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
- break;
- }
- prev_offset = offset;
- offset = *((unsigned short *)(ptr + offset));
- }
- if (!rio_table_hdr) {
- printk(KERN_DEBUG "Calgary: Unable to locate Rio Grande table "
- "in EBDA - bailing!\n");
- return -ENODEV;
- }
-
- ret = build_detail_arrays();
- if (ret) {
- printk(KERN_DEBUG "Calgary: build_detail_arrays ret %d\n", ret);
- return -ENOMEM;
- }
-
- specified_table_size = determine_tce_table_size();
-
- for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) {
- struct calgary_bus_info *info = &bus_info[bus];
- unsigned short pci_device;
- u32 val;
-
- val = read_pci_config(bus, 0, 0, 0);
- pci_device = (val & 0xFFFF0000) >> 16;
-
- if (!is_cal_pci_dev(pci_device))
- continue;
-
- if (info->translation_disabled)
- continue;
-
- if (calgary_bus_has_devices(bus, pci_device) ||
- translate_empty_slots) {
- /*
- * If it is kdump kernel, find and use tce tables
- * from first kernel, else allocate tce tables here
- */
- if (!is_kdump_kernel()) {
- tbl = alloc_tce_table();
- if (!tbl)
- goto cleanup;
- info->tce_space = tbl;
- }
- calgary_found = 1;
- }
- }
-
- printk(KERN_DEBUG "Calgary: finished detection, Calgary %s\n",
- calgary_found ? "found" : "not found");
-
- if (calgary_found) {
- iommu_detected = 1;
- calgary_detected = 1;
- printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n");
- printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n",
- specified_table_size);
-
- x86_init.iommu.iommu_init = calgary_iommu_init;
- }
- return calgary_found;
-
-cleanup:
- for (--bus; bus >= 0; --bus) {
- struct calgary_bus_info *info = &bus_info[bus];
-
- if (info->tce_space)
- free_tce_table(info->tce_space);
- }
- return -ENOMEM;
-}
-
-static int __init calgary_parse_options(char *p)
-{
- unsigned int bridge;
- unsigned long val;
- size_t len;
- ssize_t ret;
-
- while (*p) {
- if (!strncmp(p, "64k", 3))
- specified_table_size = TCE_TABLE_SIZE_64K;
- else if (!strncmp(p, "128k", 4))
- specified_table_size = TCE_TABLE_SIZE_128K;
- else if (!strncmp(p, "256k", 4))
- specified_table_size = TCE_TABLE_SIZE_256K;
- else if (!strncmp(p, "512k", 4))
- specified_table_size = TCE_TABLE_SIZE_512K;
- else if (!strncmp(p, "1M", 2))
- specified_table_size = TCE_TABLE_SIZE_1M;
- else if (!strncmp(p, "2M", 2))
- specified_table_size = TCE_TABLE_SIZE_2M;
- else if (!strncmp(p, "4M", 2))
- specified_table_size = TCE_TABLE_SIZE_4M;
- else if (!strncmp(p, "8M", 2))
- specified_table_size = TCE_TABLE_SIZE_8M;
-
- len = strlen("translate_empty_slots");
- if (!strncmp(p, "translate_empty_slots", len))
- translate_empty_slots = 1;
-
- len = strlen("disable");
- if (!strncmp(p, "disable", len)) {
- p += len;
- if (*p == '=')
- ++p;
- if (*p == '\0')
- break;
- ret = kstrtoul(p, 0, &val);
- if (ret)
- break;
-
- bridge = val;
- if (bridge < MAX_PHB_BUS_NUM) {
- printk(KERN_INFO "Calgary: disabling "
- "translation for PHB %#x\n", bridge);
- bus_info[bridge].translation_disabled = 1;
- }
- }
-
- p = strpbrk(p, ",");
- if (!p)
- break;
-
- p++; /* skip ',' */
- }
- return 1;
-}
-__setup("calgary=", calgary_parse_options);
-
-static void __init calgary_fixup_one_tce_space(struct pci_dev *dev)
-{
- struct iommu_table *tbl;
- unsigned int npages;
- int i;
-
- tbl = pci_iommu(dev->bus);
-
- for (i = 0; i < 4; i++) {
- struct resource *r = &dev->resource[PCI_BRIDGE_RESOURCES + i];
-
- /* Don't give out TCEs that map MEM resources */
- if (!(r->flags & IORESOURCE_MEM))
- continue;
-
- /* 0-based? we reserve the whole 1st MB anyway */
- if (!r->start)
- continue;
-
- /* cover the whole region */
- npages = resource_size(r) >> PAGE_SHIFT;
- npages++;
-
- iommu_range_reserve(tbl, r->start, npages);
- }
-}
-
-static int __init calgary_fixup_tce_spaces(void)
-{
- struct pci_dev *dev = NULL;
- struct calgary_bus_info *info;
-
- if (no_iommu || swiotlb || !calgary_detected)
- return -ENODEV;
-
- printk(KERN_DEBUG "Calgary: fixing up tce spaces\n");
-
- do {
- dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev);
- if (!dev)
- break;
- if (!is_cal_pci_dev(dev->device))
- continue;
-
- info = &bus_info[dev->bus->number];
- if (info->translation_disabled)
- continue;
-
- if (!info->tce_space)
- continue;
-
- calgary_fixup_one_tce_space(dev);
-
- } while (1);
-
- return 0;
-}
-
-/*
- * We need to be call after pcibios_assign_resources (fs_initcall level)
- * and before device_initcall.
- */
-rootfs_initcall(calgary_fixup_tce_spaces);
-
-IOMMU_INIT_POST(detect_calgary);
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index fa4352dce491..57de2ebff7e2 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -12,7 +12,6 @@
#include <asm/dma.h>
#include <asm/iommu.h>
#include <asm/gart.h>
-#include <asm/calgary.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
@@ -112,11 +111,6 @@ static __init int iommu_setup(char *p)
gart_parse_options(p);
-#ifdef CONFIG_CALGARY_IOMMU
- if (!strncmp(p, "calgary", 7))
- use_calgary = 1;
-#endif /* CONFIG_CALGARY_IOMMU */
-
p += strcspn(p, ",");
if (*p == ',')
++p;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 5e94c4354d4e..bd2a11ca5dd6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -41,6 +41,7 @@
#include <asm/desc.h>
#include <asm/prctl.h>
#include <asm/spec-ctrl.h>
+#include <asm/io_bitmap.h>
#include <asm/proto.h>
#include "process.h"
@@ -72,18 +73,9 @@ __visible DEFINE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw) = {
#ifdef CONFIG_X86_32
.ss0 = __KERNEL_DS,
.ss1 = __KERNEL_CS,
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
#endif
+ .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
},
-#ifdef CONFIG_X86_32
- /*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
- .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
-#endif
};
EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
@@ -110,28 +102,89 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
void exit_thread(struct task_struct *tsk)
{
struct thread_struct *t = &tsk->thread;
- unsigned long *bp = t->io_bitmap_ptr;
struct fpu *fpu = &t->fpu;
- if (bp) {
- struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
-
- t->io_bitmap_ptr = NULL;
- clear_thread_flag(TIF_IO_BITMAP);
- /*
- * Careful, clear this in the TSS too:
- */
- memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
- t->io_bitmap_max = 0;
- put_cpu();
- kfree(bp);
- }
+ if (test_thread_flag(TIF_IO_BITMAP))
+ io_bitmap_exit();
free_vm86(t);
fpu__drop(fpu);
}
+static int set_new_tls(struct task_struct *p, unsigned long tls)
+{
+ struct user_desc __user *utls = (struct user_desc __user *)tls;
+
+ if (in_ia32_syscall())
+ return do_set_thread_area(p, -1, utls, 0);
+ else
+ return do_set_thread_area_64(p, ARCH_SET_FS, tls);
+}
+
+int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
+ unsigned long arg, struct task_struct *p, unsigned long tls)
+{
+ struct inactive_task_frame *frame;
+ struct fork_frame *fork_frame;
+ struct pt_regs *childregs;
+ int ret = 0;
+
+ childregs = task_pt_regs(p);
+ fork_frame = container_of(childregs, struct fork_frame, regs);
+ frame = &fork_frame->frame;
+
+ frame->bp = 0;
+ frame->ret_addr = (unsigned long) ret_from_fork;
+ p->thread.sp = (unsigned long) fork_frame;
+ p->thread.io_bitmap = NULL;
+ memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+#ifdef CONFIG_X86_64
+ savesegment(gs, p->thread.gsindex);
+ p->thread.gsbase = p->thread.gsindex ? 0 : current->thread.gsbase;
+ savesegment(fs, p->thread.fsindex);
+ p->thread.fsbase = p->thread.fsindex ? 0 : current->thread.fsbase;
+ savesegment(es, p->thread.es);
+ savesegment(ds, p->thread.ds);
+#else
+ p->thread.sp0 = (unsigned long) (childregs + 1);
+ /*
+ * Clear all status flags including IF and set fixed bit. 64bit
+ * does not have this initialization as the frame does not contain
+ * flags. The flags consistency (especially vs. AC) is there
+ * ensured via objtool, which lacks 32bit support.
+ */
+ frame->flags = X86_EFLAGS_FIXED;
+#endif
+
+ /* Kernel thread ? */
+ if (unlikely(p->flags & PF_KTHREAD)) {
+ memset(childregs, 0, sizeof(struct pt_regs));
+ kthread_frame_init(frame, sp, arg);
+ return 0;
+ }
+
+ frame->bx = 0;
+ *childregs = *current_pt_regs();
+ childregs->ax = 0;
+ if (sp)
+ childregs->sp = sp;
+
+#ifdef CONFIG_X86_32
+ task_user_gs(p) = get_user_gs(current_pt_regs());
+#endif
+
+ /* Set a new TLS for the child thread? */
+ if (clone_flags & CLONE_SETTLS)
+ ret = set_new_tls(p, tls);
+
+ if (!ret && unlikely(test_tsk_thread_flag(current, TIF_IO_BITMAP)))
+ io_bitmap_share(p);
+
+ return ret;
+}
+
void flush_thread(void)
{
struct task_struct *tsk = current;
@@ -269,31 +322,96 @@ void arch_setup_new_exec(void)
}
}
-static inline void switch_to_bitmap(struct thread_struct *prev,
- struct thread_struct *next,
- unsigned long tifp, unsigned long tifn)
+#ifdef CONFIG_X86_IOPL_IOPERM
+static inline void tss_invalidate_io_bitmap(struct tss_struct *tss)
{
- struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+ /*
+ * Invalidate the I/O bitmap by moving io_bitmap_base outside the
+ * TSS limit so any subsequent I/O access from user space will
+ * trigger a #GP.
+ *
+ * This is correct even when VMEXIT rewrites the TSS limit
+ * to 0x67 as the only requirement is that the base points
+ * outside the limit.
+ */
+ tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
+}
- if (tifn & _TIF_IO_BITMAP) {
- /*
- * Copy the relevant range of the IO bitmap.
- * Normally this is 128 bytes or less:
- */
- memcpy(tss->io_bitmap, next->io_bitmap_ptr,
- max(prev->io_bitmap_max, next->io_bitmap_max));
+static inline void switch_to_bitmap(unsigned long tifp)
+{
+ /*
+ * Invalidate I/O bitmap if the previous task used it. This prevents
+ * any possible leakage of an active I/O bitmap.
+ *
+ * If the next task has an I/O bitmap it will handle it on exit to
+ * user mode.
+ */
+ if (tifp & _TIF_IO_BITMAP)
+ tss_invalidate_io_bitmap(this_cpu_ptr(&cpu_tss_rw));
+}
+
+static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
+{
+ /*
+ * Copy at least the byte range of the incoming tasks bitmap which
+ * covers the permitted I/O ports.
+ *
+ * If the previous task which used an I/O bitmap had more bits
+ * permitted, then the copy needs to cover those as well so they
+ * get turned off.
+ */
+ memcpy(tss->io_bitmap.bitmap, iobm->bitmap,
+ max(tss->io_bitmap.prev_max, iobm->max));
+
+ /*
+ * Store the new max and the sequence number of this bitmap
+ * and a pointer to the bitmap itself.
+ */
+ tss->io_bitmap.prev_max = iobm->max;
+ tss->io_bitmap.prev_sequence = iobm->sequence;
+}
+
+/**
+ * tss_update_io_bitmap - Update I/O bitmap before exiting to usermode
+ */
+void tss_update_io_bitmap(void)
+{
+ struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
+ u16 *base = &tss->x86_tss.io_bitmap_base;
+
+ if (test_thread_flag(TIF_IO_BITMAP)) {
+ struct thread_struct *t = &current->thread;
+
+ if (IS_ENABLED(CONFIG_X86_IOPL_IOPERM) && t->iopl_emul == 3) {
+ *base = IO_BITMAP_OFFSET_VALID_ALL;
+ } else {
+ struct io_bitmap *iobm = t->io_bitmap;
+ /*
+ * Only copy bitmap data when the sequence number
+ * differs. The update time is accounted to the
+ * incoming task.
+ */
+ if (tss->io_bitmap.prev_sequence != iobm->sequence)
+ tss_copy_io_bitmap(tss, iobm);
+
+ /* Enable the bitmap */
+ *base = IO_BITMAP_OFFSET_VALID_MAP;
+ }
/*
- * Make sure that the TSS limit is correct for the CPU
- * to notice the IO bitmap.
+ * Make sure that the TSS limit is covering the io bitmap.
+ * It might have been cut down by a VMEXIT to 0x67 which
+ * would cause a subsequent I/O access from user space to
+ * trigger a #GP because tbe bitmap is outside the TSS
+ * limit.
*/
refresh_tss_limit();
- } else if (tifp & _TIF_IO_BITMAP) {
- /*
- * Clear any possible leftover bits:
- */
- memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
+ } else {
+ tss_invalidate_io_bitmap(tss);
}
}
+#else /* CONFIG_X86_IOPL_IOPERM */
+static inline void switch_to_bitmap(unsigned long tifp) { }
+#endif
#ifdef CONFIG_SMP
@@ -505,7 +623,8 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p)
tifn = READ_ONCE(task_thread_info(next_p)->flags);
tifp = READ_ONCE(task_thread_info(prev_p)->flags);
- switch_to_bitmap(prev, next, tifp, tifn);
+
+ switch_to_bitmap(tifp);
propagate_user_return_notify(prev_p, next_p);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b8ceec4974fe..323499f48858 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -112,74 +112,6 @@ void release_thread(struct task_struct *dead_task)
release_vm86_irqs(dead_task);
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
-{
- struct pt_regs *childregs = task_pt_regs(p);
- struct fork_frame *fork_frame = container_of(childregs, struct fork_frame, regs);
- struct inactive_task_frame *frame = &fork_frame->frame;
- struct task_struct *tsk;
- int err;
-
- /*
- * For a new task use the RESET flags value since there is no before.
- * All the status flags are zero; DF and all the system flags must also
- * be 0, specifically IF must be 0 because we context switch to the new
- * task with interrupts disabled.
- */
- frame->flags = X86_EFLAGS_FIXED;
- frame->bp = 0;
- frame->ret_addr = (unsigned long) ret_from_fork;
- p->thread.sp = (unsigned long) fork_frame;
- p->thread.sp0 = (unsigned long) (childregs+1);
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
- if (unlikely(p->flags & PF_KTHREAD)) {
- /* kernel thread */
- memset(childregs, 0, sizeof(struct pt_regs));
- frame->bx = sp; /* function */
- frame->di = arg;
- p->thread.io_bitmap_ptr = NULL;
- return 0;
- }
- frame->bx = 0;
- *childregs = *current_pt_regs();
- childregs->ax = 0;
- if (sp)
- childregs->sp = sp;
-
- task_user_gs(p) = get_user_gs(current_pt_regs());
-
- p->thread.io_bitmap_ptr = NULL;
- tsk = current;
- err = -ENOMEM;
-
- if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
-
- err = 0;
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS)
- err = do_set_thread_area(p, -1,
- (struct user_desc __user *)tls, 0);
-
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
- return err;
-}
-
void
start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
{
@@ -255,15 +187,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
load_TLS(next, cpu);
- /*
- * Restore IOPL if needed. In normal use, the flags restore
- * in the switch assembly will handle this. But if the kernel
- * is running virtualized at a non-zero CPL, the popf will
- * not restore flags, so it must be done in a separate step.
- */
- if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl))
- set_iopl_mask(next->iopl);
-
switch_to_extra(prev_p, next_p);
/*
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index af64519b2695..506d66830d4d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -371,81 +371,6 @@ void x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase)
task->thread.gsbase = gsbase;
}
-int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
- unsigned long arg, struct task_struct *p, unsigned long tls)
-{
- int err;
- struct pt_regs *childregs;
- struct fork_frame *fork_frame;
- struct inactive_task_frame *frame;
- struct task_struct *me = current;
-
- childregs = task_pt_regs(p);
- fork_frame = container_of(childregs, struct fork_frame, regs);
- frame = &fork_frame->frame;
-
- frame->bp = 0;
- frame->ret_addr = (unsigned long) ret_from_fork;
- p->thread.sp = (unsigned long) fork_frame;
- p->thread.io_bitmap_ptr = NULL;
-
- savesegment(gs, p->thread.gsindex);
- p->thread.gsbase = p->thread.gsindex ? 0 : me->thread.gsbase;
- savesegment(fs, p->thread.fsindex);
- p->thread.fsbase = p->thread.fsindex ? 0 : me->thread.fsbase;
- savesegment(es, p->thread.es);
- savesegment(ds, p->thread.ds);
- memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-
- if (unlikely(p->flags & PF_KTHREAD)) {
- /* kernel thread */
- memset(childregs, 0, sizeof(struct pt_regs));
- frame->bx = sp; /* function */
- frame->r12 = arg;
- return 0;
- }
- frame->bx = 0;
- *childregs = *current_pt_regs();
-
- childregs->ax = 0;
- if (sp)
- childregs->sp = sp;
-
- err = -ENOMEM;
- if (unlikely(test_tsk_thread_flag(me, TIF_IO_BITMAP))) {
- p->thread.io_bitmap_ptr = kmemdup(me->thread.io_bitmap_ptr,
- IO_BITMAP_BYTES, GFP_KERNEL);
- if (!p->thread.io_bitmap_ptr) {
- p->thread.io_bitmap_max = 0;
- return -ENOMEM;
- }
- set_tsk_thread_flag(p, TIF_IO_BITMAP);
- }
-
- /*
- * Set a new TLS for the child thread?
- */
- if (clone_flags & CLONE_SETTLS) {
-#ifdef CONFIG_IA32_EMULATION
- if (in_ia32_syscall())
- err = do_set_thread_area(p, -1,
- (struct user_desc __user *)tls, 0);
- else
-#endif
- err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
- if (err)
- goto out;
- }
- err = 0;
-out:
- if (err && p->thread.io_bitmap_ptr) {
- kfree(p->thread.io_bitmap_ptr);
- p->thread.io_bitmap_max = 0;
- }
-
- return err;
-}
-
static void
start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
@@ -572,17 +497,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
switch_to_extra(prev_p, next_p);
-#ifdef CONFIG_XEN_PV
- /*
- * On Xen PV, IOPL bits in pt_regs->flags have no effect, and
- * current_pt_regs()->flags may not match the current task's
- * intended IOPL. We need to switch it manually.
- */
- if (unlikely(static_cpu_has(X86_FEATURE_XENPV) &&
- prev->iopl != next->iopl))
- xen_set_iopl_mask(next->iopl);
-#endif
-
if (static_cpu_has_bug(X86_BUG_SYSRET_SS_ATTRS)) {
/*
* AMD CPUs have a misfeature: SYSRET sets the SS selector but
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 3c5bbe8e4120..066e5b01a7e0 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -42,6 +42,7 @@
#include <asm/traps.h>
#include <asm/syscall.h>
#include <asm/fsgsbase.h>
+#include <asm/io_bitmap.h>
#include "tls.h"
@@ -697,7 +698,9 @@ static int ptrace_set_debugreg(struct task_struct *tsk, int n,
static int ioperm_active(struct task_struct *target,
const struct user_regset *regset)
{
- return target->thread.io_bitmap_max / regset->size;
+ struct io_bitmap *iobm = target->thread.io_bitmap;
+
+ return iobm ? DIV_ROUND_UP(iobm->max, regset->size) : 0;
}
static int ioperm_get(struct task_struct *target,
@@ -705,12 +708,13 @@ static int ioperm_get(struct task_struct *target,
unsigned int pos, unsigned int count,
void *kbuf, void __user *ubuf)
{
- if (!target->thread.io_bitmap_ptr)
+ struct io_bitmap *iobm = target->thread.io_bitmap;
+
+ if (!iobm)
return -ENXIO;
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- target->thread.io_bitmap_ptr,
- 0, IO_BITMAP_BYTES);
+ iobm->bitmap, 0, IO_BITMAP_BYTES);
}
/*
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index ee26df08002e..94b33885f8d2 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -35,8 +35,7 @@
#define CP_PA_BACKUP_PAGES_MAP DATA(0x1c)
.text
- .globl relocate_kernel
-relocate_kernel:
+SYM_CODE_START_NOALIGN(relocate_kernel)
/* Save the CPU context, used for jumping back */
pushl %ebx
@@ -93,8 +92,9 @@ relocate_kernel:
addl $(identity_mapped - relocate_kernel), %eax
pushl %eax
ret
+SYM_CODE_END(relocate_kernel)
-identity_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* set return address to 0 if not preserving context */
pushl $0
/* store the start address on the stack */
@@ -191,8 +191,9 @@ identity_mapped:
addl $(virtual_mapped - relocate_kernel), %eax
pushl %eax
ret
+SYM_CODE_END(identity_mapped)
-virtual_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
movl CR4(%edi), %eax
movl %eax, %cr4
movl CR3(%edi), %eax
@@ -208,9 +209,10 @@ virtual_mapped:
popl %esi
popl %ebx
ret
+SYM_CODE_END(virtual_mapped)
/* Do the copies */
-swap_pages:
+SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
movl 8(%esp), %edx
movl 4(%esp), %ecx
pushl %ebp
@@ -270,6 +272,7 @@ swap_pages:
popl %ebx
popl %ebp
ret
+SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index c51ccff5cd01..ef3ba99068d3 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -38,8 +38,7 @@
.text
.align PAGE_SIZE
.code64
- .globl relocate_kernel
-relocate_kernel:
+SYM_CODE_START_NOALIGN(relocate_kernel)
/*
* %rdi indirection_page
* %rsi page_list
@@ -103,8 +102,9 @@ relocate_kernel:
addq $(identity_mapped - relocate_kernel), %r8
pushq %r8
ret
+SYM_CODE_END(relocate_kernel)
-identity_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
/* set return address to 0 if not preserving context */
pushq $0
/* store the start address on the stack */
@@ -209,8 +209,9 @@ identity_mapped:
movq $virtual_mapped, %rax
pushq %rax
ret
+SYM_CODE_END(identity_mapped)
-virtual_mapped:
+SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
movq RSP(%r8), %rsp
movq CR4(%r8), %rax
movq %rax, %cr4
@@ -228,9 +229,10 @@ virtual_mapped:
popq %rbp
popq %rbx
ret
+SYM_CODE_END(virtual_mapped)
/* Do the copies */
-swap_pages:
+SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
movq %rdi, %rcx /* Put the page_list in %rcx */
xorl %edi, %edi
xorl %esi, %esi
@@ -283,6 +285,7 @@ swap_pages:
jmp 0b
3:
ret
+SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
.set kexec_control_code_size, . - relocate_kernel
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 77ea96b794bd..cedfe2077a69 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -143,6 +143,13 @@ struct boot_params boot_params;
/*
* Machine setup..
*/
+static struct resource rodata_resource = {
+ .name = "Kernel rodata",
+ .start = 0,
+ .end = 0,
+ .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM
+};
+
static struct resource data_resource = {
.name = "Kernel data",
.start = 0,
@@ -438,6 +445,12 @@ static void __init memblock_x86_reserve_range_setup_data(void)
while (pa_data) {
data = early_memremap(pa_data, sizeof(*data));
memblock_reserve(pa_data, sizeof(*data) + data->len);
+
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT)
+ memblock_reserve(((struct setup_indirect *)data->data)->addr,
+ ((struct setup_indirect *)data->data)->len);
+
pa_data = data->next;
early_memunmap(data, sizeof(*data));
}
@@ -459,7 +472,7 @@ static void __init memblock_x86_reserve_range_setup_data(void)
* due to mapping restrictions.
*
* On 64bit, kdump kernel need be restricted to be under 64TB, which is
- * the upper limit of system RAM in 4-level paing mode. Since the kdump
+ * the upper limit of system RAM in 4-level paging mode. Since the kdump
* jumping could be from 5-level to 4-level, the jumping will fail if
* kernel is put above 64TB, and there's no way to detect the paging mode
* of the kernel which will be loaded for dumping during the 1st kernel
@@ -743,8 +756,8 @@ static void __init trim_bios_range(void)
e820__range_update(0, PAGE_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED);
/*
- * special case: Some BIOSen report the PC BIOS
- * area (640->1Mb) as ram even though it is not.
+ * special case: Some BIOSes report the PC BIOS
+ * area (640Kb -> 1Mb) as RAM even though it is not.
* take them out.
*/
e820__range_remove(BIOS_BEGIN, BIOS_END - BIOS_BEGIN, E820_TYPE_RAM, 1);
@@ -951,7 +964,9 @@ void __init setup_arch(char **cmdline_p)
code_resource.start = __pa_symbol(_text);
code_resource.end = __pa_symbol(_etext)-1;
- data_resource.start = __pa_symbol(_etext);
+ rodata_resource.start = __pa_symbol(__start_rodata);
+ rodata_resource.end = __pa_symbol(__end_rodata)-1;
+ data_resource.start = __pa_symbol(_sdata);
data_resource.end = __pa_symbol(_edata)-1;
bss_resource.start = __pa_symbol(__bss_start);
bss_resource.end = __pa_symbol(__bss_stop)-1;
@@ -1040,6 +1055,7 @@ void __init setup_arch(char **cmdline_p)
/* after parse_early_param, so could debug it */
insert_resource(&iomem_resource, &code_resource);
+ insert_resource(&iomem_resource, &rodata_resource);
insert_resource(&iomem_resource, &data_resource);
insert_resource(&iomem_resource, &bss_resource);
@@ -1122,17 +1138,15 @@ void __init setup_arch(char **cmdline_p)
reserve_bios_regions();
- if (efi_enabled(EFI_MEMMAP)) {
- efi_fake_memmap();
- efi_find_mirror();
- efi_esrt_init();
+ efi_fake_memmap();
+ efi_find_mirror();
+ efi_esrt_init();
- /*
- * The EFI specification says that boot service code won't be
- * called after ExitBootServices(). This is, in fact, a lie.
- */
- efi_reserve_boot_services();
- }
+ /*
+ * The EFI specification says that boot service code won't be
+ * called after ExitBootServices(). This is, in fact, a lie.
+ */
+ efi_reserve_boot_services();
/* preallocate 4k for mptable mpc */
e820__memblock_alloc_reserved_mpc_new();
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 86663874ef04..e6d7894ad127 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -207,8 +207,8 @@ void __init setup_per_cpu_areas(void)
pcpu_cpu_distance,
pcpu_fc_alloc, pcpu_fc_free);
if (rc < 0)
- pr_warning("%s allocator failed (%d), falling back to page size\n",
- pcpu_fc_names[pcpu_chosen_fc], rc);
+ pr_warn("%s allocator failed (%d), falling back to page size\n",
+ pcpu_fc_names[pcpu_chosen_fc], rc);
}
if (rc < 0)
rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
index a49fe1dcb47e..4c61f0713832 100644
--- a/arch/x86/kernel/tboot.c
+++ b/arch/x86/kernel/tboot.c
@@ -57,7 +57,7 @@ void __init tboot_probe(void)
*/
if (!e820__mapped_any(boot_params.tboot_addr,
boot_params.tboot_addr, E820_TYPE_RESERVED)) {
- pr_warning("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
+ pr_warn("non-0 tboot_addr but it is not of type E820_TYPE_RESERVED\n");
return;
}
@@ -65,13 +65,12 @@ void __init tboot_probe(void)
set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
- pr_warning("tboot at 0x%llx is invalid\n",
- boot_params.tboot_addr);
+ pr_warn("tboot at 0x%llx is invalid\n", boot_params.tboot_addr);
tboot = NULL;
return;
}
if (tboot->version < 5) {
- pr_warning("tboot version is invalid: %u\n", tboot->version);
+ pr_warn("tboot version is invalid: %u\n", tboot->version);
tboot = NULL;
return;
}
@@ -289,7 +288,7 @@ static int tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
if (sleep_state >= ACPI_S_STATE_COUNT ||
acpi_shutdown_map[sleep_state] == -1) {
- pr_warning("unsupported sleep state 0x%x\n", sleep_state);
+ pr_warn("unsupported sleep state 0x%x\n", sleep_state);
return -1;
}
@@ -302,7 +301,7 @@ static int tboot_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b)
if (!tboot_enabled())
return 0;
- pr_warning("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
+ pr_warn("tboot is not able to suspend on platforms with reduced hardware sleep (ACPIv5)");
return -ENODEV;
}
@@ -320,7 +319,7 @@ static int tboot_wait_for_aps(int num_aps)
}
if (timeout)
- pr_warning("tboot wait for APs timeout\n");
+ pr_warn("tboot wait for APs timeout\n");
return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
}
@@ -516,7 +515,7 @@ int tboot_force_iommu(void)
return 1;
if (no_iommu || swiotlb || dmar_disabled)
- pr_warning("Forcing Intel-IOMMU to enabled\n");
+ pr_warn("Forcing Intel-IOMMU to enabled\n");
dmar_disabled = 0;
#ifdef CONFIG_SWIOTLB
diff --git a/arch/x86/kernel/tce_64.c b/arch/x86/kernel/tce_64.c
deleted file mode 100644
index 6384be751eff..000000000000
--- a/arch/x86/kernel/tce_64.c
+++ /dev/null
@@ -1,177 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * This file manages the translation entries for the IBM Calgary IOMMU.
- *
- * Derived from arch/powerpc/platforms/pseries/iommu.c
- *
- * Copyright (C) IBM Corporation, 2006
- *
- * Author: Jon Mason <jdmason@us.ibm.com>
- * Author: Muli Ben-Yehuda <muli@il.ibm.com>
- */
-
-#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/pci.h>
-#include <linux/dma-mapping.h>
-#include <linux/memblock.h>
-#include <asm/tce.h>
-#include <asm/calgary.h>
-#include <asm/proto.h>
-#include <asm/cacheflush.h>
-
-/* flush a tce at 'tceaddr' to main memory */
-static inline void flush_tce(void* tceaddr)
-{
- /* a single tce can't cross a cache line */
- if (boot_cpu_has(X86_FEATURE_CLFLUSH))
- clflush(tceaddr);
- else
- wbinvd();
-}
-
-void tce_build(struct iommu_table *tbl, unsigned long index,
- unsigned int npages, unsigned long uaddr, int direction)
-{
- u64* tp;
- u64 t;
- u64 rpn;
-
- t = (1 << TCE_READ_SHIFT);
- if (direction != DMA_TO_DEVICE)
- t |= (1 << TCE_WRITE_SHIFT);
-
- tp = ((u64*)tbl->it_base) + index;
-
- while (npages--) {
- rpn = (virt_to_bus((void*)uaddr)) >> PAGE_SHIFT;
- t &= ~TCE_RPN_MASK;
- t |= (rpn << TCE_RPN_SHIFT);
-
- *tp = cpu_to_be64(t);
- flush_tce(tp);
-
- uaddr += PAGE_SIZE;
- tp++;
- }
-}
-
-void tce_free(struct iommu_table *tbl, long index, unsigned int npages)
-{
- u64* tp;
-
- tp = ((u64*)tbl->it_base) + index;
-
- while (npages--) {
- *tp = cpu_to_be64(0);
- flush_tce(tp);
- tp++;
- }
-}
-
-static inline unsigned int table_size_to_number_of_entries(unsigned char size)
-{
- /*
- * size is the order of the table, 0-7
- * smallest table is 8K entries, so shift result by 13 to
- * multiply by 8K
- */
- return (1 << size) << 13;
-}
-
-static int tce_table_setparms(struct pci_dev *dev, struct iommu_table *tbl)
-{
- unsigned int bitmapsz;
- unsigned long bmppages;
- int ret;
-
- tbl->it_busno = dev->bus->number;
-
- /* set the tce table size - measured in entries */
- tbl->it_size = table_size_to_number_of_entries(specified_table_size);
-
- /*
- * number of bytes needed for the bitmap size in number of
- * entries; we need one bit per entry
- */
- bitmapsz = tbl->it_size / BITS_PER_BYTE;
- bmppages = __get_free_pages(GFP_KERNEL, get_order(bitmapsz));
- if (!bmppages) {
- printk(KERN_ERR "Calgary: cannot allocate bitmap\n");
- ret = -ENOMEM;
- goto done;
- }
-
- tbl->it_map = (unsigned long*)bmppages;
-
- memset(tbl->it_map, 0, bitmapsz);
-
- tbl->it_hint = 0;
-
- spin_lock_init(&tbl->it_lock);
-
- return 0;
-
-done:
- return ret;
-}
-
-int __init build_tce_table(struct pci_dev *dev, void __iomem *bbar)
-{
- struct iommu_table *tbl;
- int ret;
-
- if (pci_iommu(dev->bus)) {
- printk(KERN_ERR "Calgary: dev %p has sysdata->iommu %p\n",
- dev, pci_iommu(dev->bus));
- BUG();
- }
-
- tbl = kzalloc(sizeof(struct iommu_table), GFP_KERNEL);
- if (!tbl) {
- printk(KERN_ERR "Calgary: error allocating iommu_table\n");
- ret = -ENOMEM;
- goto done;
- }
-
- ret = tce_table_setparms(dev, tbl);
- if (ret)
- goto free_tbl;
-
- tbl->bbar = bbar;
-
- set_pci_iommu(dev->bus, tbl);
-
- return 0;
-
-free_tbl:
- kfree(tbl);
-done:
- return ret;
-}
-
-void * __init alloc_tce_table(void)
-{
- unsigned int size;
-
- size = table_size_to_number_of_entries(specified_table_size);
- size *= TCE_ENTRY_SIZE;
-
- return memblock_alloc_low(size, size);
-}
-
-void __init free_tce_table(void *tbl)
-{
- unsigned int size;
-
- if (!tbl)
- return;
-
- size = table_size_to_number_of_entries(specified_table_size);
- size *= TCE_ENTRY_SIZE;
-
- memblock_free(__pa(tbl), size);
-}
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 4bb0f8447112..c90312146da0 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -37,11 +37,6 @@
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/io.h>
-
-#if defined(CONFIG_EDAC)
-#include <linux/edac.h>
-#endif
-
#include <asm/stacktrace.h>
#include <asm/processor.h>
#include <asm/debugreg.h>
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index ec534f978867..b8acf639abd1 100644
--- a/arch/x86/kernel/tsc_sync.c
+++ b/arch/x86/kernel/tsc_sync.c
@@ -364,12 +364,12 @@ retry:
/* Force it to 0 if random warps brought us here */
atomic_set(&test_runs, 0);
- pr_warning("TSC synchronization [CPU#%d -> CPU#%d]:\n",
+ pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
smp_processor_id(), cpu);
- pr_warning("Measured %Ld cycles TSC warp between CPUs, "
- "turning off TSC clock.\n", max_warp);
+ pr_warn("Measured %Ld cycles TSC warp between CPUs, "
+ "turning off TSC clock.\n", max_warp);
if (random_warps)
- pr_warning("TSC warped randomly between CPUs\n");
+ pr_warn("TSC warped randomly between CPUs\n");
mark_tsc_unstable("check_tsc_sync_source failed");
}
diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c
index 548fefed71ee..4d732a444711 100644
--- a/arch/x86/kernel/umip.c
+++ b/arch/x86/kernel/umip.c
@@ -1,6 +1,6 @@
/*
- * umip.c Emulation for instruction protected by the Intel User-Mode
- * Instruction Prevention feature
+ * umip.c Emulation for instruction protected by the User-Mode Instruction
+ * Prevention feature
*
* Copyright (c) 2017, Intel Corporation.
* Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
@@ -18,10 +18,10 @@
/** DOC: Emulation for User-Mode Instruction Prevention (UMIP)
*
- * The feature User-Mode Instruction Prevention present in recent Intel
- * processor prevents a group of instructions (SGDT, SIDT, SLDT, SMSW and STR)
- * from being executed with CPL > 0. Otherwise, a general protection fault is
- * issued.
+ * User-Mode Instruction Prevention is a security feature present in recent
+ * x86 processors that, when enabled, prevents a group of instructions (SGDT,
+ * SIDT, SLDT, SMSW and STR) from being run in user mode by issuing a general
+ * protection fault if the instruction is executed with CPL > 0.
*
* Rather than relaying to the user space the general protection fault caused by
* the UMIP-protected instructions (in the form of a SIGSEGV signal), it can be
@@ -91,7 +91,7 @@ const char * const umip_insns[5] = {
#define umip_pr_err(regs, fmt, ...) \
umip_printk(regs, KERN_ERR, fmt, ##__VA_ARGS__)
-#define umip_pr_warning(regs, fmt, ...) \
+#define umip_pr_warn(regs, fmt, ...) \
umip_printk(regs, KERN_WARNING, fmt, ##__VA_ARGS__)
/**
@@ -380,14 +380,14 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (umip_inst < 0)
return false;
- umip_pr_warning(regs, "%s instruction cannot be used by applications.\n",
+ umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]);
/* Do not emulate (spoof) SLDT or STR. */
if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
return false;
- umip_pr_warning(regs, "For now, expensive software emulation returns the result.\n");
+ umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
user_64bit_mode(regs)))
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 8cd745ef8c7b..15e5aad8ac2c 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -842,8 +842,8 @@ static int push_setup_xol_ops(struct arch_uprobe *auprobe, struct insn *insn)
/**
* arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
+ * @auprobe: the probepoint information.
* @mm: the probed address space.
- * @arch_uprobe: the probepoint information.
* @addr: virtual address at which to install the probepoint
* Return 0 on success or a -ve number on error.
*/
diff --git a/arch/x86/kernel/verify_cpu.S b/arch/x86/kernel/verify_cpu.S
index a024c4f7ba56..641f0fe1e5b4 100644
--- a/arch/x86/kernel/verify_cpu.S
+++ b/arch/x86/kernel/verify_cpu.S
@@ -31,7 +31,7 @@
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
-ENTRY(verify_cpu)
+SYM_FUNC_START_LOCAL(verify_cpu)
pushf # Save caller passed flags
push $0 # Kill any dangerous flags
popf
@@ -137,4 +137,4 @@ ENTRY(verify_cpu)
popf # Restore caller passed flags
xorl %eax, %eax
ret
-ENDPROC(verify_cpu)
+SYM_FUNC_END(verify_cpu)
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index e2feacf921a0..3a1a819da137 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -21,6 +21,9 @@
#define LOAD_OFFSET __START_KERNEL_map
#endif
+#define EMITS_PT_NOTE
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
@@ -141,17 +144,12 @@ SECTIONS
*(.text.__x86.indirect_thunk)
__indirect_thunk_end = .;
#endif
+ } :text =0xcccc
- /* End of text section */
- _etext = .;
- } :text = 0x9090
-
- NOTES :text :note
-
- EXCEPTION_TABLE(16) :text = 0x9090
-
- /* .text should occupy whole number of pages */
+ /* End of text section, which should occupy whole number of pages */
+ _etext = .;
. = ALIGN(PAGE_SIZE);
+
X86_ALIGN_RODATA_BEGIN
RO_DATA(PAGE_SIZE)
X86_ALIGN_RODATA_END
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 18a799c8fa28..ce89430a7f80 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -31,6 +31,28 @@ static int __init iommu_init_noop(void) { return 0; }
static void iommu_shutdown_noop(void) { }
bool __init bool_x86_init_noop(void) { return false; }
void x86_op_int_noop(int cpu) { }
+static __init int set_rtc_noop(const struct timespec64 *now) { return -EINVAL; }
+static __init void get_rtc_noop(struct timespec64 *now) { }
+
+static __initconst const struct of_device_id of_cmos_match[] = {
+ { .compatible = "motorola,mc146818" },
+ {}
+};
+
+/*
+ * Allow devicetree configured systems to disable the RTC by setting the
+ * corresponding DT node's status property to disabled. Code is optimized
+ * out for CONFIG_OF=n builds.
+ */
+static __init void x86_wallclock_init(void)
+{
+ struct device_node *node = of_find_matching_node(NULL, of_cmos_match);
+
+ if (node && !of_device_is_available(node)) {
+ x86_platform.get_wallclock = get_rtc_noop;
+ x86_platform.set_wallclock = set_rtc_noop;
+ }
+}
/*
* The platform setup functions are preset with the default functions
@@ -73,7 +95,7 @@ struct x86_init_ops x86_init __initdata = {
.timers = {
.setup_percpu_clockev = setup_boot_APIC_clock,
.timer_init = hpet_time_init,
- .wallclock_init = x86_init_noop,
+ .wallclock_init = x86_wallclock_init,
},
.iommu = {
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index d5e6d5b3f06f..bcc6a73d6628 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -508,8 +508,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
*filter = tmp;
mutex_lock(&kvm->lock);
- rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
- mutex_is_locked(&kvm->lock));
+ filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
+ mutex_is_locked(&kvm->lock));
mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 751a384c2eb0..81ada2ce99e7 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -43,7 +43,7 @@
* they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
* to vmx_vmexit.
*/
-ENTRY(vmx_vmenter)
+SYM_FUNC_START(vmx_vmenter)
/* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
je 2f
@@ -65,7 +65,7 @@ ENTRY(vmx_vmenter)
_ASM_EXTABLE(1b, 5b)
_ASM_EXTABLE(2b, 5b)
-ENDPROC(vmx_vmenter)
+SYM_FUNC_END(vmx_vmenter)
/**
* vmx_vmexit - Handle a VMX VM-Exit
@@ -77,7 +77,7 @@ ENDPROC(vmx_vmenter)
* here after hardware loads the host's state, i.e. this is the destination
* referred to by VMCS.HOST_RIP.
*/
-ENTRY(vmx_vmexit)
+SYM_FUNC_START(vmx_vmexit)
#ifdef CONFIG_RETPOLINE
ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
/* Preserve guest's RAX, it's used to stuff the RSB. */
@@ -90,7 +90,7 @@ ENTRY(vmx_vmexit)
.Lvmexit_skip_rsb:
#endif
ret
-ENDPROC(vmx_vmexit)
+SYM_FUNC_END(vmx_vmexit)
/**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
@@ -101,7 +101,7 @@ ENDPROC(vmx_vmexit)
* Returns:
* 0 on VM-Exit, 1 on VM-Fail
*/
-ENTRY(__vmx_vcpu_run)
+SYM_FUNC_START(__vmx_vcpu_run)
push %_ASM_BP
mov %_ASM_SP, %_ASM_BP
#ifdef CONFIG_X86_64
@@ -233,4 +233,4 @@ ENTRY(__vmx_vcpu_run)
/* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
2: mov $1, %eax
jmp 1b
-ENDPROC(__vmx_vcpu_run)
+SYM_FUNC_END(__vmx_vcpu_run)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d175429c91b0..1b9ab4166397 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1368,14 +1368,6 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
(unsigned long)&get_cpu_entry_area(cpu)->tss.x86_tss);
vmcs_writel(HOST_GDTR_BASE, (unsigned long)gdt); /* 22.2.4 */
- /*
- * VM exits change the host TR limit to 0x67 after a VM
- * exit. This is okay, since 0x67 covers everything except
- * the IO bitmap and have have code to handle the IO bitmap
- * being lost after a VM exit.
- */
- BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67);
-
rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp);
vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ed167e039e5..cf917139de6b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -68,6 +68,7 @@
#include <asm/mshyperv.h>
#include <asm/hypervisor.h>
#include <asm/intel_pt.h>
+#include <asm/emulate_prefix.h>
#include <clocksource/hyperv_timer.h>
#define CREATE_TRACE_POINTS
@@ -5492,6 +5493,7 @@ EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system);
int handle_ud(struct kvm_vcpu *vcpu)
{
+ static const char kvm_emulate_prefix[] = { __KVM_EMULATE_PREFIX };
int emul_type = EMULTYPE_TRAP_UD;
char sig[5]; /* ud2; .ascii "kvm" */
struct x86_exception e;
@@ -5499,7 +5501,7 @@ int handle_ud(struct kvm_vcpu *vcpu)
if (force_emulation_prefix &&
kvm_read_guest_virt(vcpu, kvm_get_linear_rip(vcpu),
sig, sizeof(sig), &e) == 0 &&
- memcmp(sig, "\xf\xbkvm", sizeof(sig)) == 0) {
+ memcmp(sig, kvm_emulate_prefix, sizeof(sig)) == 0) {
kvm_rip_write(vcpu, kvm_rip_read(vcpu) + sizeof(sig));
emul_type = EMULTYPE_TRAP_UD_FORCED;
}
diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S
index e0788bade5ab..3b6544111ac9 100644
--- a/arch/x86/lib/atomic64_386_32.S
+++ b/arch/x86/lib/atomic64_386_32.S
@@ -20,10 +20,10 @@
#define BEGIN(op) \
.macro endp; \
-ENDPROC(atomic64_##op##_386); \
+SYM_FUNC_END(atomic64_##op##_386); \
.purgem endp; \
.endm; \
-ENTRY(atomic64_##op##_386); \
+SYM_FUNC_START(atomic64_##op##_386); \
LOCK v;
#define ENDP endp
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index 843d978ee341..1c5c81c16b06 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -16,12 +16,12 @@
cmpxchg8b (\reg)
.endm
-ENTRY(atomic64_read_cx8)
+SYM_FUNC_START(atomic64_read_cx8)
read64 %ecx
ret
-ENDPROC(atomic64_read_cx8)
+SYM_FUNC_END(atomic64_read_cx8)
-ENTRY(atomic64_set_cx8)
+SYM_FUNC_START(atomic64_set_cx8)
1:
/* we don't need LOCK_PREFIX since aligned 64-bit writes
* are atomic on 586 and newer */
@@ -29,19 +29,19 @@ ENTRY(atomic64_set_cx8)
jne 1b
ret
-ENDPROC(atomic64_set_cx8)
+SYM_FUNC_END(atomic64_set_cx8)
-ENTRY(atomic64_xchg_cx8)
+SYM_FUNC_START(atomic64_xchg_cx8)
1:
LOCK_PREFIX
cmpxchg8b (%esi)
jne 1b
ret
-ENDPROC(atomic64_xchg_cx8)
+SYM_FUNC_END(atomic64_xchg_cx8)
.macro addsub_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebp
pushl %ebx
pushl %esi
@@ -69,14 +69,14 @@ ENTRY(atomic64_\func\()_return_cx8)
popl %ebx
popl %ebp
ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
addsub_return add add adc
addsub_return sub sub sbb
.macro incdec_return func ins insc
-ENTRY(atomic64_\func\()_return_cx8)
+SYM_FUNC_START(atomic64_\func\()_return_cx8)
pushl %ebx
read64 %esi
@@ -94,13 +94,13 @@ ENTRY(atomic64_\func\()_return_cx8)
movl %ecx, %edx
popl %ebx
ret
-ENDPROC(atomic64_\func\()_return_cx8)
+SYM_FUNC_END(atomic64_\func\()_return_cx8)
.endm
incdec_return inc add adc
incdec_return dec sub sbb
-ENTRY(atomic64_dec_if_positive_cx8)
+SYM_FUNC_START(atomic64_dec_if_positive_cx8)
pushl %ebx
read64 %esi
@@ -119,9 +119,9 @@ ENTRY(atomic64_dec_if_positive_cx8)
movl %ecx, %edx
popl %ebx
ret
-ENDPROC(atomic64_dec_if_positive_cx8)
+SYM_FUNC_END(atomic64_dec_if_positive_cx8)
-ENTRY(atomic64_add_unless_cx8)
+SYM_FUNC_START(atomic64_add_unless_cx8)
pushl %ebp
pushl %ebx
/* these just push these two parameters on the stack */
@@ -155,9 +155,9 @@ ENTRY(atomic64_add_unless_cx8)
jne 2b
xorl %eax, %eax
jmp 3b
-ENDPROC(atomic64_add_unless_cx8)
+SYM_FUNC_END(atomic64_add_unless_cx8)
-ENTRY(atomic64_inc_not_zero_cx8)
+SYM_FUNC_START(atomic64_inc_not_zero_cx8)
pushl %ebx
read64 %esi
@@ -177,4 +177,4 @@ ENTRY(atomic64_inc_not_zero_cx8)
3:
popl %ebx
ret
-ENDPROC(atomic64_inc_not_zero_cx8)
+SYM_FUNC_END(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index 4df90c9ea383..4742e8fa7ee7 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -46,7 +46,7 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
* Fortunately, it is easy to convert 2-byte alignment to 4-byte
* alignment for the unrolled loop.
*/
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
pushl %esi
pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
@@ -128,13 +128,13 @@ ENTRY(csum_partial)
popl %ebx
popl %esi
ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
#else
/* Version for PentiumII/PPro */
-ENTRY(csum_partial)
+SYM_FUNC_START(csum_partial)
pushl %esi
pushl %ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
@@ -246,7 +246,7 @@ ENTRY(csum_partial)
popl %ebx
popl %esi
ret
-ENDPROC(csum_partial)
+SYM_FUNC_END(csum_partial)
#endif
EXPORT_SYMBOL(csum_partial)
@@ -280,7 +280,7 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst,
#define ARGBASE 16
#define FP 12
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
subl $4,%esp
pushl %edi
pushl %esi
@@ -398,7 +398,7 @@ DST( movb %cl, (%edi) )
popl %edi
popl %ecx # equivalent to addl $4,%esp
ret
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
#else
@@ -416,7 +416,7 @@ ENDPROC(csum_partial_copy_generic)
#define ARGBASE 12
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
pushl %ebx
pushl %edi
pushl %esi
@@ -483,7 +483,7 @@ DST( movb %dl, (%edi) )
popl %edi
popl %ebx
ret
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
#undef ROUND
#undef ROUND1
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index 75a5a4515fa7..c4c7dd115953 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -13,15 +13,15 @@
* Zero a page.
* %rdi - page
*/
-ENTRY(clear_page_rep)
+SYM_FUNC_START(clear_page_rep)
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
-ENDPROC(clear_page_rep)
+SYM_FUNC_END(clear_page_rep)
EXPORT_SYMBOL_GPL(clear_page_rep)
-ENTRY(clear_page_orig)
+SYM_FUNC_START(clear_page_orig)
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -40,13 +40,13 @@ ENTRY(clear_page_orig)
jnz .Lloop
nop
ret
-ENDPROC(clear_page_orig)
+SYM_FUNC_END(clear_page_orig)
EXPORT_SYMBOL_GPL(clear_page_orig)
-ENTRY(clear_page_erms)
+SYM_FUNC_START(clear_page_erms)
movl $4096,%ecx
xorl %eax,%eax
rep stosb
ret
-ENDPROC(clear_page_erms)
+SYM_FUNC_END(clear_page_erms)
EXPORT_SYMBOL_GPL(clear_page_erms)
diff --git a/arch/x86/lib/cmpxchg16b_emu.S b/arch/x86/lib/cmpxchg16b_emu.S
index d63185698a23..3542502faa3b 100644
--- a/arch/x86/lib/cmpxchg16b_emu.S
+++ b/arch/x86/lib/cmpxchg16b_emu.S
@@ -13,7 +13,7 @@
* %rcx : high 64 bits of new value
* %al : Operation successful
*/
-ENTRY(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_START(this_cpu_cmpxchg16b_emu)
#
# Emulate 'cmpxchg16b %gs:(%rsi)' except we return the result in %al not
@@ -44,4 +44,4 @@ ENTRY(this_cpu_cmpxchg16b_emu)
xor %al,%al
ret
-ENDPROC(this_cpu_cmpxchg16b_emu)
+SYM_FUNC_END(this_cpu_cmpxchg16b_emu)
diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S
index 691d80e97488..ca01ed6029f4 100644
--- a/arch/x86/lib/cmpxchg8b_emu.S
+++ b/arch/x86/lib/cmpxchg8b_emu.S
@@ -13,7 +13,7 @@
* %ebx : low 32 bits of new value
* %ecx : high 32 bits of new value
*/
-ENTRY(cmpxchg8b_emu)
+SYM_FUNC_START(cmpxchg8b_emu)
#
# Emulate 'cmpxchg8b (%esi)' on UP except we don't
@@ -42,5 +42,5 @@ ENTRY(cmpxchg8b_emu)
popfl
ret
-ENDPROC(cmpxchg8b_emu)
+SYM_FUNC_END(cmpxchg8b_emu)
EXPORT_SYMBOL(cmpxchg8b_emu)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index fd2d09afa097..2402d4c489d2 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -13,15 +13,15 @@
* prefetch distance based on SMP/UP.
*/
ALIGN
-ENTRY(copy_page)
+SYM_FUNC_START(copy_page)
ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
-ENDPROC(copy_page)
+SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
-ENTRY(copy_page_regs)
+SYM_FUNC_START_LOCAL(copy_page_regs)
subq $2*8, %rsp
movq %rbx, (%rsp)
movq %r12, 1*8(%rsp)
@@ -86,4 +86,4 @@ ENTRY(copy_page_regs)
movq 1*8(%rsp), %r12
addq $2*8, %rsp
ret
-ENDPROC(copy_page_regs)
+SYM_FUNC_END(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index 86976b55ae74..816f128a6d52 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -53,7 +53,7 @@
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_unrolled)
+SYM_FUNC_START(copy_user_generic_unrolled)
ASM_STAC
cmpl $8,%edx
jb 20f /* less then 8 bytes, go to byte copy loop */
@@ -136,7 +136,7 @@ ENTRY(copy_user_generic_unrolled)
_ASM_EXTABLE_UA(19b, 40b)
_ASM_EXTABLE_UA(21b, 50b)
_ASM_EXTABLE_UA(22b, 50b)
-ENDPROC(copy_user_generic_unrolled)
+SYM_FUNC_END(copy_user_generic_unrolled)
EXPORT_SYMBOL(copy_user_generic_unrolled)
/* Some CPUs run faster using the string copy instructions.
@@ -157,7 +157,7 @@ EXPORT_SYMBOL(copy_user_generic_unrolled)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_generic_string)
+SYM_FUNC_START(copy_user_generic_string)
ASM_STAC
cmpl $8,%edx
jb 2f /* less than 8 bytes, go to byte copy loop */
@@ -182,7 +182,7 @@ ENTRY(copy_user_generic_string)
_ASM_EXTABLE_UA(1b, 11b)
_ASM_EXTABLE_UA(3b, 12b)
-ENDPROC(copy_user_generic_string)
+SYM_FUNC_END(copy_user_generic_string)
EXPORT_SYMBOL(copy_user_generic_string)
/*
@@ -197,7 +197,7 @@ EXPORT_SYMBOL(copy_user_generic_string)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ENTRY(copy_user_enhanced_fast_string)
+SYM_FUNC_START(copy_user_enhanced_fast_string)
ASM_STAC
cmpl $64,%edx
jb .L_copy_short_string /* less then 64 bytes, avoid the costly 'rep' */
@@ -214,7 +214,7 @@ ENTRY(copy_user_enhanced_fast_string)
.previous
_ASM_EXTABLE_UA(1b, 12b)
-ENDPROC(copy_user_enhanced_fast_string)
+SYM_FUNC_END(copy_user_enhanced_fast_string)
EXPORT_SYMBOL(copy_user_enhanced_fast_string)
/*
@@ -230,8 +230,7 @@ EXPORT_SYMBOL(copy_user_enhanced_fast_string)
* Output:
* eax uncopied bytes or 0 if successful.
*/
-ALIGN;
-.Lcopy_user_handle_tail:
+SYM_CODE_START_LOCAL(.Lcopy_user_handle_tail)
movl %edx,%ecx
1: rep movsb
2: mov %ecx,%eax
@@ -239,7 +238,7 @@ ALIGN;
ret
_ASM_EXTABLE_UA(1b, 2b)
-END(.Lcopy_user_handle_tail)
+SYM_CODE_END(.Lcopy_user_handle_tail)
/*
* copy_user_nocache - Uncached memory copy with exception handling
@@ -250,7 +249,7 @@ END(.Lcopy_user_handle_tail)
* - Require 8-byte alignment when size is 8 bytes or larger.
* - Require 4-byte alignment when size is 4 bytes.
*/
-ENTRY(__copy_user_nocache)
+SYM_FUNC_START(__copy_user_nocache)
ASM_STAC
/* If size is less than 8 bytes, go to 4-byte copy */
@@ -389,5 +388,5 @@ ENTRY(__copy_user_nocache)
_ASM_EXTABLE_UA(31b, .L_fixup_4b_copy)
_ASM_EXTABLE_UA(40b, .L_fixup_1b_copy)
_ASM_EXTABLE_UA(41b, .L_fixup_1b_copy)
-ENDPROC(__copy_user_nocache)
+SYM_FUNC_END(__copy_user_nocache)
EXPORT_SYMBOL(__copy_user_nocache)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index a4a379e79259..3394a8ff7fd0 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -49,7 +49,7 @@
.endm
-ENTRY(csum_partial_copy_generic)
+SYM_FUNC_START(csum_partial_copy_generic)
cmpl $3*64, %edx
jle .Lignore
@@ -225,4 +225,4 @@ ENTRY(csum_partial_copy_generic)
jz .Lende
movl $-EFAULT, (%rax)
jmp .Lende
-ENDPROC(csum_partial_copy_generic)
+SYM_FUNC_END(csum_partial_copy_generic)
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 9578eb88fc87..c8a85b512796 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -36,7 +36,7 @@
#include <asm/export.h>
.text
-ENTRY(__get_user_1)
+SYM_FUNC_START(__get_user_1)
mov PER_CPU_VAR(current_task), %_ASM_DX
cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
jae bad_get_user
@@ -47,10 +47,10 @@ ENTRY(__get_user_1)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_1)
+SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1)
-ENTRY(__get_user_2)
+SYM_FUNC_START(__get_user_2)
add $1,%_ASM_AX
jc bad_get_user
mov PER_CPU_VAR(current_task), %_ASM_DX
@@ -63,10 +63,10 @@ ENTRY(__get_user_2)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_2)
+SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2)
-ENTRY(__get_user_4)
+SYM_FUNC_START(__get_user_4)
add $3,%_ASM_AX
jc bad_get_user
mov PER_CPU_VAR(current_task), %_ASM_DX
@@ -79,10 +79,10 @@ ENTRY(__get_user_4)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__get_user_4)
+SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4)
-ENTRY(__get_user_8)
+SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64
add $7,%_ASM_AX
jc bad_get_user
@@ -111,25 +111,27 @@ ENTRY(__get_user_8)
ASM_CLAC
ret
#endif
-ENDPROC(__get_user_8)
+SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8)
-.Lbad_get_user_clac:
+SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC
bad_get_user:
xor %edx,%edx
mov $(-EFAULT),%_ASM_AX
ret
+SYM_CODE_END(.Lbad_get_user_clac)
#ifdef CONFIG_X86_32
-.Lbad_get_user_8_clac:
+SYM_CODE_START_LOCAL(.Lbad_get_user_8_clac)
ASM_CLAC
bad_get_user_8:
xor %edx,%edx
xor %ecx,%ecx
mov $(-EFAULT),%_ASM_AX
ret
+SYM_CODE_END(.Lbad_get_user_8_clac)
#endif
_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac)
diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S
index a14f9939c365..dbf8cc97b7f5 100644
--- a/arch/x86/lib/hweight.S
+++ b/arch/x86/lib/hweight.S
@@ -8,7 +8,7 @@
* unsigned int __sw_hweight32(unsigned int w)
* %rdi: w
*/
-ENTRY(__sw_hweight32)
+SYM_FUNC_START(__sw_hweight32)
#ifdef CONFIG_X86_64
movl %edi, %eax # w
@@ -33,10 +33,10 @@ ENTRY(__sw_hweight32)
shrl $24, %eax # w = w_tmp >> 24
__ASM_SIZE(pop,) %__ASM_REG(dx)
ret
-ENDPROC(__sw_hweight32)
+SYM_FUNC_END(__sw_hweight32)
EXPORT_SYMBOL(__sw_hweight32)
-ENTRY(__sw_hweight64)
+SYM_FUNC_START(__sw_hweight64)
#ifdef CONFIG_X86_64
pushq %rdi
pushq %rdx
@@ -79,5 +79,5 @@ ENTRY(__sw_hweight64)
popl %ecx
ret
#endif
-ENDPROC(__sw_hweight64)
+SYM_FUNC_END(__sw_hweight64)
EXPORT_SYMBOL(__sw_hweight64)
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 0b5862ba6a75..404279563891 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -13,6 +13,8 @@
#include <asm/inat.h>
#include <asm/insn.h>
+#include <asm/emulate_prefix.h>
+
/* Verify next sizeof(t) bytes can be on the same instruction */
#define validate_next(t, insn, n) \
((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
@@ -58,6 +60,36 @@ void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
insn->addr_bytes = 4;
}
+static const insn_byte_t xen_prefix[] = { __XEN_EMULATE_PREFIX };
+static const insn_byte_t kvm_prefix[] = { __KVM_EMULATE_PREFIX };
+
+static int __insn_get_emulate_prefix(struct insn *insn,
+ const insn_byte_t *prefix, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (peek_nbyte_next(insn_byte_t, insn, i) != prefix[i])
+ goto err_out;
+ }
+
+ insn->emulate_prefix_size = len;
+ insn->next_byte += len;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+static void insn_get_emulate_prefix(struct insn *insn)
+{
+ if (__insn_get_emulate_prefix(insn, xen_prefix, sizeof(xen_prefix)))
+ return;
+
+ __insn_get_emulate_prefix(insn, kvm_prefix, sizeof(kvm_prefix));
+}
+
/**
* insn_get_prefixes - scan x86 instruction prefix bytes
* @insn: &struct insn containing instruction
@@ -76,6 +108,8 @@ void insn_get_prefixes(struct insn *insn)
if (prefixes->got)
return;
+ insn_get_emulate_prefix(insn);
+
nb = 0;
lb = 0;
b = peek_next(insn_byte_t, insn);
diff --git a/arch/x86/lib/iomap_copy_64.S b/arch/x86/lib/iomap_copy_64.S
index a9bdf0805be0..cb5a1964506b 100644
--- a/arch/x86/lib/iomap_copy_64.S
+++ b/arch/x86/lib/iomap_copy_64.S
@@ -8,8 +8,8 @@
/*
* override generic version in lib/iomap_copy.c
*/
-ENTRY(__iowrite32_copy)
+SYM_FUNC_START(__iowrite32_copy)
movl %edx,%ecx
rep movsd
ret
-ENDPROC(__iowrite32_copy)
+SYM_FUNC_END(__iowrite32_copy)
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 92748660ba51..56b243b14c3a 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -28,8 +28,8 @@
* Output:
* rax original destination
*/
-ENTRY(__memcpy)
-ENTRY(memcpy)
+SYM_FUNC_START_ALIAS(__memcpy)
+SYM_FUNC_START_LOCAL(memcpy)
ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
"jmp memcpy_erms", X86_FEATURE_ERMS
@@ -41,8 +41,8 @@ ENTRY(memcpy)
movl %edx, %ecx
rep movsb
ret
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
+SYM_FUNC_END(memcpy)
+SYM_FUNC_END_ALIAS(__memcpy)
EXPORT_SYMBOL(memcpy)
EXPORT_SYMBOL(__memcpy)
@@ -50,14 +50,14 @@ EXPORT_SYMBOL(__memcpy)
* memcpy_erms() - enhanced fast string memcpy. This is faster and
* simpler than memcpy. Use memcpy_erms when possible.
*/
-ENTRY(memcpy_erms)
+SYM_FUNC_START_LOCAL(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
-ENDPROC(memcpy_erms)
+SYM_FUNC_END(memcpy_erms)
-ENTRY(memcpy_orig)
+SYM_FUNC_START_LOCAL(memcpy_orig)
movq %rdi, %rax
cmpq $0x20, %rdx
@@ -182,7 +182,7 @@ ENTRY(memcpy_orig)
.Lend:
retq
-ENDPROC(memcpy_orig)
+SYM_FUNC_END(memcpy_orig)
#ifndef CONFIG_UML
@@ -193,7 +193,7 @@ MCSAFE_TEST_CTL
* Note that we only catch machine checks when reading the source addresses.
* Writes to target are posted and don't generate machine checks.
*/
-ENTRY(__memcpy_mcsafe)
+SYM_FUNC_START(__memcpy_mcsafe)
cmpl $8, %edx
/* Less than 8 bytes? Go to byte copy loop */
jb .L_no_whole_words
@@ -260,7 +260,7 @@ ENTRY(__memcpy_mcsafe)
xorl %eax, %eax
.L_done:
ret
-ENDPROC(__memcpy_mcsafe)
+SYM_FUNC_END(__memcpy_mcsafe)
EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
.section .fixup, "ax"
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index bbec69d8223b..337830d7a59c 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -26,8 +26,8 @@
*/
.weak memmove
-ENTRY(memmove)
-ENTRY(__memmove)
+SYM_FUNC_START_ALIAS(memmove)
+SYM_FUNC_START(__memmove)
/* Handle more 32 bytes in loop */
mov %rdi, %rax
@@ -207,7 +207,7 @@ ENTRY(__memmove)
movb %r11b, (%rdi)
13:
retq
-ENDPROC(__memmove)
-ENDPROC(memmove)
+SYM_FUNC_END(__memmove)
+SYM_FUNC_END_ALIAS(memmove)
EXPORT_SYMBOL(__memmove)
EXPORT_SYMBOL(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 9bc861c71e75..9ff15ee404a4 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -19,8 +19,8 @@
*
* rax original destination
*/
-ENTRY(memset)
-ENTRY(__memset)
+SYM_FUNC_START_ALIAS(memset)
+SYM_FUNC_START(__memset)
/*
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
* to use it when possible. If not available, use fast string instructions.
@@ -43,8 +43,8 @@ ENTRY(__memset)
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset)
-ENDPROC(__memset)
+SYM_FUNC_END(__memset)
+SYM_FUNC_END_ALIAS(memset)
EXPORT_SYMBOL(memset)
EXPORT_SYMBOL(__memset)
@@ -59,16 +59,16 @@ EXPORT_SYMBOL(__memset)
*
* rax original destination
*/
-ENTRY(memset_erms)
+SYM_FUNC_START_LOCAL(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
-ENDPROC(memset_erms)
+SYM_FUNC_END(memset_erms)
-ENTRY(memset_orig)
+SYM_FUNC_START_LOCAL(memset_orig)
movq %rdi,%r10
/* expand byte value */
@@ -139,4 +139,4 @@ ENTRY(memset_orig)
subq %r8,%rdx
jmp .Lafter_bad_alignment
.Lfinal:
-ENDPROC(memset_orig)
+SYM_FUNC_END(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index ed33cbab3958..a2b9caa5274c 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -12,7 +12,7 @@
*
*/
.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
pushq %rbx
pushq %r12
movq %rdi, %r10 /* Save pointer */
@@ -41,13 +41,13 @@ ENTRY(\op\()_safe_regs)
jmp 2b
_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
.endm
#else /* X86_32 */
.macro op_safe_regs op
-ENTRY(\op\()_safe_regs)
+SYM_FUNC_START(\op\()_safe_regs)
pushl %ebx
pushl %ebp
pushl %esi
@@ -83,7 +83,7 @@ ENTRY(\op\()_safe_regs)
jmp 2b
_ASM_EXTABLE(1b, 3b)
-ENDPROC(\op\()_safe_regs)
+SYM_FUNC_END(\op\()_safe_regs)
.endm
#endif
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 126dd6a9ec9b..7c7c92db8497 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -34,7 +34,7 @@
#define ENTER mov PER_CPU_VAR(current_task), %_ASM_BX
.text
-ENTRY(__put_user_1)
+SYM_FUNC_START(__put_user_1)
ENTER
cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
jae .Lbad_put_user
@@ -43,10 +43,10 @@ ENTRY(__put_user_1)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__put_user_1)
+SYM_FUNC_END(__put_user_1)
EXPORT_SYMBOL(__put_user_1)
-ENTRY(__put_user_2)
+SYM_FUNC_START(__put_user_2)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $1,%_ASM_BX
@@ -57,10 +57,10 @@ ENTRY(__put_user_2)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__put_user_2)
+SYM_FUNC_END(__put_user_2)
EXPORT_SYMBOL(__put_user_2)
-ENTRY(__put_user_4)
+SYM_FUNC_START(__put_user_4)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $3,%_ASM_BX
@@ -71,10 +71,10 @@ ENTRY(__put_user_4)
xor %eax,%eax
ASM_CLAC
ret
-ENDPROC(__put_user_4)
+SYM_FUNC_END(__put_user_4)
EXPORT_SYMBOL(__put_user_4)
-ENTRY(__put_user_8)
+SYM_FUNC_START(__put_user_8)
ENTER
mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
sub $7,%_ASM_BX
@@ -88,14 +88,15 @@ ENTRY(__put_user_8)
xor %eax,%eax
ASM_CLAC
RET
-ENDPROC(__put_user_8)
+SYM_FUNC_END(__put_user_8)
EXPORT_SYMBOL(__put_user_8)
-.Lbad_put_user_clac:
+SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
ASM_CLAC
.Lbad_put_user:
movl $-EFAULT,%eax
RET
+SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index c909961e678a..363ec132df7e 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -11,11 +11,11 @@
.macro THUNK reg
.section .text.__x86.indirect_thunk
-ENTRY(__x86_indirect_thunk_\reg)
+SYM_FUNC_START(__x86_indirect_thunk_\reg)
CFI_STARTPROC
JMP_NOSPEC %\reg
CFI_ENDPROC
-ENDPROC(__x86_indirect_thunk_\reg)
+SYM_FUNC_END(__x86_indirect_thunk_\reg)
.endm
/*
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index e0b85930dd77..0a0e9112f284 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -333,7 +333,7 @@ AVXcode: 1
06: CLTS
07: SYSRET (o64)
08: INVD
-09: WBINVD
+09: WBINVD | WBNOINVD (F3)
0a:
0b: UD2 (1B)
0c:
@@ -364,7 +364,7 @@ AVXcode: 1
# a ModR/M byte.
1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
-1c:
+1c: Grp20 (1A),(1C)
1d:
1e:
1f: NOP Ev
@@ -792,6 +792,8 @@ f3: Grp17 (1A)
f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
+f9: MOVDIRI My,Gy
EndTable
Table: 3-byte opcode 2 (0x0f 0x3a)
@@ -943,9 +945,9 @@ GrpTable: Grp6
EndTable
GrpTable: Grp7
-0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
-1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
-2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B) | PCONFIG (101),(11B) | ENCLV (000),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B) | ENCLS (111),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
5: rdpkru (110),(11B) | wrpkru (111),(11B)
@@ -1020,7 +1022,7 @@ GrpTable: Grp15
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@@ -1051,6 +1053,10 @@ GrpTable: Grp19
6: vscatterpf1qps/d Wx (66),(ev)
EndTable
+GrpTable: Grp20
+0: cldemote Mb
+EndTable
+
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
diff --git a/arch/x86/math-emu/div_Xsig.S b/arch/x86/math-emu/div_Xsig.S
index ee08449d20fd..951da2ad54bb 100644
--- a/arch/x86/math-emu/div_Xsig.S
+++ b/arch/x86/math-emu/div_Xsig.S
@@ -75,7 +75,7 @@ FPU_result_1:
.text
-ENTRY(div_Xsig)
+SYM_FUNC_START(div_Xsig)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
@@ -364,4 +364,4 @@ L_bugged_2:
pop %ebx
jmp L_exit
#endif /* PARANOID */
-ENDPROC(div_Xsig)
+SYM_FUNC_END(div_Xsig)
diff --git a/arch/x86/math-emu/div_small.S b/arch/x86/math-emu/div_small.S
index 8f5025c80ee0..d047d1816abe 100644
--- a/arch/x86/math-emu/div_small.S
+++ b/arch/x86/math-emu/div_small.S
@@ -19,7 +19,7 @@
#include "fpu_emu.h"
.text
-ENTRY(FPU_div_small)
+SYM_FUNC_START(FPU_div_small)
pushl %ebp
movl %esp,%ebp
@@ -45,4 +45,4 @@ ENTRY(FPU_div_small)
leave
ret
-ENDPROC(FPU_div_small)
+SYM_FUNC_END(FPU_div_small)
diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h
index f98a0c956764..9b41391867dc 100644
--- a/arch/x86/math-emu/fpu_system.h
+++ b/arch/x86/math-emu/fpu_system.h
@@ -107,6 +107,8 @@ static inline bool seg_writable(struct desc_struct *d)
#define FPU_access_ok(y,z) if ( !access_ok(y,z) ) \
math_abort(FPU_info,SIGSEGV)
#define FPU_abort math_abort(FPU_info, SIGSEGV)
+#define FPU_copy_from_user(to, from, n) \
+ do { if (copy_from_user(to, from, n)) FPU_abort; } while (0)
#undef FPU_IGNORE_CODE_SEGV
#ifdef FPU_IGNORE_CODE_SEGV
@@ -122,7 +124,7 @@ static inline bool seg_writable(struct desc_struct *d)
#define FPU_code_access_ok(z) FPU_access_ok((void __user *)FPU_EIP,z)
#endif
-#define FPU_get_user(x,y) get_user((x),(y))
-#define FPU_put_user(x,y) put_user((x),(y))
+#define FPU_get_user(x,y) do { if (get_user((x),(y))) FPU_abort; } while (0)
+#define FPU_put_user(x,y) do { if (put_user((x),(y))) FPU_abort; } while (0)
#endif
diff --git a/arch/x86/math-emu/mul_Xsig.S b/arch/x86/math-emu/mul_Xsig.S
index 3e489122a2b0..4afc7b1fa6e9 100644
--- a/arch/x86/math-emu/mul_Xsig.S
+++ b/arch/x86/math-emu/mul_Xsig.S
@@ -25,7 +25,7 @@
#include "fpu_emu.h"
.text
-ENTRY(mul32_Xsig)
+SYM_FUNC_START(mul32_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
@@ -63,10 +63,10 @@ ENTRY(mul32_Xsig)
popl %esi
leave
ret
-ENDPROC(mul32_Xsig)
+SYM_FUNC_END(mul32_Xsig)
-ENTRY(mul64_Xsig)
+SYM_FUNC_START(mul64_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
@@ -116,11 +116,11 @@ ENTRY(mul64_Xsig)
popl %esi
leave
ret
-ENDPROC(mul64_Xsig)
+SYM_FUNC_END(mul64_Xsig)
-ENTRY(mul_Xsig_Xsig)
+SYM_FUNC_START(mul_Xsig_Xsig)
pushl %ebp
movl %esp,%ebp
subl $16,%esp
@@ -176,4 +176,4 @@ ENTRY(mul_Xsig_Xsig)
popl %esi
leave
ret
-ENDPROC(mul_Xsig_Xsig)
+SYM_FUNC_END(mul_Xsig_Xsig)
diff --git a/arch/x86/math-emu/polynom_Xsig.S b/arch/x86/math-emu/polynom_Xsig.S
index 604f0b2d17e8..702315eecb86 100644
--- a/arch/x86/math-emu/polynom_Xsig.S
+++ b/arch/x86/math-emu/polynom_Xsig.S
@@ -37,7 +37,7 @@
#define OVERFLOWED -16(%ebp) /* addition overflow flag */
.text
-ENTRY(polynomial_Xsig)
+SYM_FUNC_START(polynomial_Xsig)
pushl %ebp
movl %esp,%ebp
subl $32,%esp
@@ -134,4 +134,4 @@ L_accum_done:
popl %esi
leave
ret
-ENDPROC(polynomial_Xsig)
+SYM_FUNC_END(polynomial_Xsig)
diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c
index f3779743d15e..fe6246ff9887 100644
--- a/arch/x86/math-emu/reg_ld_str.c
+++ b/arch/x86/math-emu/reg_ld_str.c
@@ -85,7 +85,7 @@ int FPU_load_extended(long double __user *s, int stnr)
RE_ENTRANT_CHECK_OFF;
FPU_access_ok(s, 10);
- __copy_from_user(sti_ptr, s, 10);
+ FPU_copy_from_user(sti_ptr, s, 10);
RE_ENTRANT_CHECK_ON;
return FPU_tagof(sti_ptr);
@@ -1126,9 +1126,9 @@ void frstor(fpu_addr_modes addr_modes, u_char __user *data_address)
/* Copy all registers in stack order. */
RE_ENTRANT_CHECK_OFF;
FPU_access_ok(s, 80);
- __copy_from_user(register_base + offset, s, other);
+ FPU_copy_from_user(register_base + offset, s, other);
if (offset)
- __copy_from_user(register_base, s + other, offset);
+ FPU_copy_from_user(register_base, s + other, offset);
RE_ENTRANT_CHECK_ON;
for (i = 0; i < 8; i++) {
diff --git a/arch/x86/math-emu/reg_norm.S b/arch/x86/math-emu/reg_norm.S
index 7f6b4392a15d..cad1d60b1e84 100644
--- a/arch/x86/math-emu/reg_norm.S
+++ b/arch/x86/math-emu/reg_norm.S
@@ -22,7 +22,7 @@
.text
-ENTRY(FPU_normalize)
+SYM_FUNC_START(FPU_normalize)
pushl %ebp
movl %esp,%ebp
pushl %ebx
@@ -95,12 +95,12 @@ L_overflow:
call arith_overflow
pop %ebx
jmp L_exit
-ENDPROC(FPU_normalize)
+SYM_FUNC_END(FPU_normalize)
/* Normalise without reporting underflow or overflow */
-ENTRY(FPU_normalize_nuo)
+SYM_FUNC_START(FPU_normalize_nuo)
pushl %ebp
movl %esp,%ebp
pushl %ebx
@@ -147,4 +147,4 @@ L_exit_nuo_zero:
popl %ebx
leave
ret
-ENDPROC(FPU_normalize_nuo)
+SYM_FUNC_END(FPU_normalize_nuo)
diff --git a/arch/x86/math-emu/reg_round.S b/arch/x86/math-emu/reg_round.S
index 04563421ee7d..11a1f798451b 100644
--- a/arch/x86/math-emu/reg_round.S
+++ b/arch/x86/math-emu/reg_round.S
@@ -109,7 +109,7 @@ FPU_denormal:
.globl fpu_Arith_exit
/* Entry point when called from C */
-ENTRY(FPU_round)
+SYM_FUNC_START(FPU_round)
pushl %ebp
movl %esp,%ebp
pushl %esi
@@ -708,4 +708,4 @@ L_exception_exit:
jmp fpu_reg_round_special_exit
#endif /* PARANOID */
-ENDPROC(FPU_round)
+SYM_FUNC_END(FPU_round)
diff --git a/arch/x86/math-emu/reg_u_add.S b/arch/x86/math-emu/reg_u_add.S
index 50fe9f8c893c..9c9e2c810afe 100644
--- a/arch/x86/math-emu/reg_u_add.S
+++ b/arch/x86/math-emu/reg_u_add.S
@@ -32,7 +32,7 @@
#include "control_w.h"
.text
-ENTRY(FPU_u_add)
+SYM_FUNC_START(FPU_u_add)
pushl %ebp
movl %esp,%ebp
pushl %esi
@@ -166,4 +166,4 @@ L_exit:
leave
ret
#endif /* PARANOID */
-ENDPROC(FPU_u_add)
+SYM_FUNC_END(FPU_u_add)
diff --git a/arch/x86/math-emu/reg_u_div.S b/arch/x86/math-emu/reg_u_div.S
index 94d545e118e4..e2fb5c2644c5 100644
--- a/arch/x86/math-emu/reg_u_div.S
+++ b/arch/x86/math-emu/reg_u_div.S
@@ -75,7 +75,7 @@ FPU_ovfl_flag:
#define DEST PARAM3
.text
-ENTRY(FPU_u_div)
+SYM_FUNC_START(FPU_u_div)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
@@ -471,4 +471,4 @@ L_exit:
ret
#endif /* PARANOID */
-ENDPROC(FPU_u_div)
+SYM_FUNC_END(FPU_u_div)
diff --git a/arch/x86/math-emu/reg_u_mul.S b/arch/x86/math-emu/reg_u_mul.S
index 21cde47fb3e5..0c779c87ac5b 100644
--- a/arch/x86/math-emu/reg_u_mul.S
+++ b/arch/x86/math-emu/reg_u_mul.S
@@ -45,7 +45,7 @@ FPU_accum_1:
.text
-ENTRY(FPU_u_mul)
+SYM_FUNC_START(FPU_u_mul)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
@@ -147,4 +147,4 @@ L_exit:
ret
#endif /* PARANOID */
-ENDPROC(FPU_u_mul)
+SYM_FUNC_END(FPU_u_mul)
diff --git a/arch/x86/math-emu/reg_u_sub.S b/arch/x86/math-emu/reg_u_sub.S
index f05dea7dec38..e9bb7c248649 100644
--- a/arch/x86/math-emu/reg_u_sub.S
+++ b/arch/x86/math-emu/reg_u_sub.S
@@ -33,7 +33,7 @@
#include "control_w.h"
.text
-ENTRY(FPU_u_sub)
+SYM_FUNC_START(FPU_u_sub)
pushl %ebp
movl %esp,%ebp
pushl %esi
@@ -271,4 +271,4 @@ L_exit:
popl %esi
leave
ret
-ENDPROC(FPU_u_sub)
+SYM_FUNC_END(FPU_u_sub)
diff --git a/arch/x86/math-emu/round_Xsig.S b/arch/x86/math-emu/round_Xsig.S
index 226a51e991f1..d9d7de8dbd7b 100644
--- a/arch/x86/math-emu/round_Xsig.S
+++ b/arch/x86/math-emu/round_Xsig.S
@@ -23,7 +23,7 @@
.text
-ENTRY(round_Xsig)
+SYM_FUNC_START(round_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
@@ -79,11 +79,11 @@ L_exit:
popl %ebx
leave
ret
-ENDPROC(round_Xsig)
+SYM_FUNC_END(round_Xsig)
-ENTRY(norm_Xsig)
+SYM_FUNC_START(norm_Xsig)
pushl %ebp
movl %esp,%ebp
pushl %ebx /* Reserve some space */
@@ -139,4 +139,4 @@ L_n_exit:
popl %ebx
leave
ret
-ENDPROC(norm_Xsig)
+SYM_FUNC_END(norm_Xsig)
diff --git a/arch/x86/math-emu/shr_Xsig.S b/arch/x86/math-emu/shr_Xsig.S
index 96f4779aa9c1..726af985f758 100644
--- a/arch/x86/math-emu/shr_Xsig.S
+++ b/arch/x86/math-emu/shr_Xsig.S
@@ -22,7 +22,7 @@
#include "fpu_emu.h"
.text
-ENTRY(shr_Xsig)
+SYM_FUNC_START(shr_Xsig)
push %ebp
movl %esp,%ebp
pushl %esi
@@ -86,4 +86,4 @@ L_more_than_95:
popl %esi
leave
ret
-ENDPROC(shr_Xsig)
+SYM_FUNC_END(shr_Xsig)
diff --git a/arch/x86/math-emu/wm_shrx.S b/arch/x86/math-emu/wm_shrx.S
index d588874eb6fb..4fc89174caf0 100644
--- a/arch/x86/math-emu/wm_shrx.S
+++ b/arch/x86/math-emu/wm_shrx.S
@@ -33,7 +33,7 @@
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
-ENTRY(FPU_shrx)
+SYM_FUNC_START(FPU_shrx)
push %ebp
movl %esp,%ebp
pushl %esi
@@ -93,7 +93,7 @@ L_more_than_95:
popl %esi
leave
ret
-ENDPROC(FPU_shrx)
+SYM_FUNC_END(FPU_shrx)
/*---------------------------------------------------------------------------+
@@ -112,7 +112,7 @@ ENDPROC(FPU_shrx)
| part which has been shifted out of the arg. |
| Results returned in the 64 bit arg and eax. |
+---------------------------------------------------------------------------*/
-ENTRY(FPU_shrxs)
+SYM_FUNC_START(FPU_shrxs)
push %ebp
movl %esp,%ebp
pushl %esi
@@ -204,4 +204,4 @@ Ls_more_than_95:
popl %esi
leave
ret
-ENDPROC(FPU_shrxs)
+SYM_FUNC_END(FPU_shrxs)
diff --git a/arch/x86/math-emu/wm_sqrt.S b/arch/x86/math-emu/wm_sqrt.S
index f031c0e19356..3b2b58164ec1 100644
--- a/arch/x86/math-emu/wm_sqrt.S
+++ b/arch/x86/math-emu/wm_sqrt.S
@@ -75,7 +75,7 @@ FPU_fsqrt_arg_0:
.text
-ENTRY(wm_sqrt)
+SYM_FUNC_START(wm_sqrt)
pushl %ebp
movl %esp,%ebp
#ifndef NON_REENTRANT_FPU
@@ -469,4 +469,4 @@ sqrt_more_prec_large:
/* Our estimate is too large */
movl $0x7fffff00,%eax
jmp sqrt_round_result
-ENDPROC(wm_sqrt)
+SYM_FUNC_END(wm_sqrt)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index 84373dc9b341..3b89c201ac26 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg
endif
obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
- pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
+ pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o
# Make sure __phys_addr has no stackprotector
nostackp := $(call cc-option, -fno-stack-protector)
@@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp)
CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace
-obj-$(CONFIG_X86_PAT) += pat_rbtree.o
+obj-$(CONFIG_X86_PAT) += pat_interval.o
obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 752ad11d6868..82ead8e27888 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -161,6 +161,14 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
+ /*
+ * VMX changes the host TR limit to 0x67 after a VM exit. This is
+ * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure
+ * that this is correct.
+ */
+ BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0);
+ BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68);
+
cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
@@ -178,7 +186,9 @@ static __init void setup_cpu_entry_area_ptes(void)
#ifdef CONFIG_X86_32
unsigned long start, end;
- BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
+ /* The +1 is for the readonly IDT: */
+ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
+ BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
start = CPU_ENTRY_AREA_BASE;
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 4d75bc656f97..30bb0bd3b1b8 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -45,55 +45,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup,
EXPORT_SYMBOL_GPL(ex_handler_fault);
/*
- * Handler for UD0 exception following a failed test against the
- * result of a refcount inc/dec/add/sub.
- */
-__visible bool ex_handler_refcount(const struct exception_table_entry *fixup,
- struct pt_regs *regs, int trapnr,
- unsigned long error_code,
- unsigned long fault_addr)
-{
- /* First unconditionally saturate the refcount. */
- *(int *)regs->cx = INT_MIN / 2;
-
- /*
- * Strictly speaking, this reports the fixup destination, not
- * the fault location, and not the actually overflowing
- * instruction, which is the instruction before the "js", but
- * since that instruction could be a variety of lengths, just
- * report the location after the overflow, which should be close
- * enough for finding the overflow, as it's at least back in
- * the function, having returned from .text.unlikely.
- */
- regs->ip = ex_fixup_addr(fixup);
-
- /*
- * This function has been called because either a negative refcount
- * value was seen by any of the refcount functions, or a zero
- * refcount value was seen by refcount_dec().
- *
- * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result
- * wrapped around) will be set. Additionally, seeing the refcount
- * reach 0 will set ZF (Zero Flag: result was zero). In each of
- * these cases we want a report, since it's a boundary condition.
- * The SF case is not reported since it indicates post-boundary
- * manipulations below zero or above INT_MAX. And if none of the
- * flags are set, something has gone very wrong, so report it.
- */
- if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) {
- bool zero = regs->flags & X86_EFLAGS_ZF;
-
- refcount_error_report(regs, zero ? "hit zero" : "overflow");
- } else if ((regs->flags & X86_EFLAGS_SF) == 0) {
- /* Report if none of OF, ZF, nor SF are set. */
- refcount_error_report(regs, "unexpected saturation");
- }
-
- return true;
-}
-EXPORT_SYMBOL(ex_handler_refcount);
-
-/*
* Handler for when we fail to restore a task's FPU state. We should never get
* here because the FPU state of a task using the FPU (task->thread.fpu.state)
* should always be valid. However, past bugs have allowed userspace to set
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index fd10d91a6115..e7bb483557c9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -829,14 +829,13 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
* used for the kernel image only. free_init_pages() will do the
* right thing for either kind of address.
*/
-void free_kernel_image_pages(void *begin, void *end)
+void free_kernel_image_pages(const char *what, void *begin, void *end)
{
unsigned long begin_ul = (unsigned long)begin;
unsigned long end_ul = (unsigned long)end;
unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT;
-
- free_init_pages("unused kernel image", begin_ul, end_ul);
+ free_init_pages(what, begin_ul, end_ul);
/*
* PTI maps some of the kernel into userspace. For performance,
@@ -865,7 +864,8 @@ void __ref free_initmem(void)
mem_encrypt_free_decrypted_mem();
- free_kernel_image_pages(&__init_begin, &__init_end);
+ free_kernel_image_pages("unused kernel image (initmem)",
+ &__init_begin, &__init_end);
}
#ifdef CONFIG_BLK_DEV_INITRD
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index a6b5c653727b..dcb9bc961b39 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1263,7 +1263,7 @@ int kernel_set_to_readonly;
void set_kernel_text_rw(void)
{
unsigned long start = PFN_ALIGN(_text);
- unsigned long end = PFN_ALIGN(__stop___ex_table);
+ unsigned long end = PFN_ALIGN(_etext);
if (!kernel_set_to_readonly)
return;
@@ -1282,7 +1282,7 @@ void set_kernel_text_rw(void)
void set_kernel_text_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
- unsigned long end = PFN_ALIGN(__stop___ex_table);
+ unsigned long end = PFN_ALIGN(_etext);
if (!kernel_set_to_readonly)
return;
@@ -1300,9 +1300,9 @@ void mark_rodata_ro(void)
{
unsigned long start = PFN_ALIGN(_text);
unsigned long rodata_start = PFN_ALIGN(__start_rodata);
- unsigned long end = (unsigned long) &__end_rodata_hpage_align;
- unsigned long text_end = PFN_ALIGN(&__stop___ex_table);
- unsigned long rodata_end = PFN_ALIGN(&__end_rodata);
+ unsigned long end = (unsigned long)__end_rodata_hpage_align;
+ unsigned long text_end = PFN_ALIGN(_etext);
+ unsigned long rodata_end = PFN_ALIGN(__end_rodata);
unsigned long all_end;
printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n",
@@ -1334,8 +1334,10 @@ void mark_rodata_ro(void)
set_memory_ro(start, (end-start) >> PAGE_SHIFT);
#endif
- free_kernel_image_pages((void *)text_end, (void *)rodata_start);
- free_kernel_image_pages((void *)rodata_end, (void *)_sdata);
+ free_kernel_image_pages("unused kernel image (text/rodata gap)",
+ (void *)text_end, (void *)rodata_start);
+ free_kernel_image_pages("unused kernel image (rodata/data gap)",
+ (void *)rodata_end, (void *)_sdata);
debug_checkwx();
}
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index a39dcdb5ae34..1ff9c2030b4f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -626,6 +626,17 @@ static bool memremap_is_setup_data(resource_size_t phys_addr,
paddr_next = data->next;
len = data->len;
+ if ((phys_addr > paddr) && (phys_addr < (paddr + len))) {
+ memunmap(data);
+ return true;
+ }
+
+ if (data->type == SETUP_INDIRECT &&
+ ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) {
+ paddr = ((struct setup_indirect *)data->data)->addr;
+ len = ((struct setup_indirect *)data->data)->len;
+ }
+
memunmap(data);
if ((phys_addr > paddr) && (phys_addr < (paddr + len)))
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 79eb55ce69a9..49d7814b59a9 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
int ret;
WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
if (f->armed) {
- pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n",
- f->addr, f->count, !!f->old_presence);
+ pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
+ f->addr, f->count, !!f->old_presence);
}
ret = clear_page_presence(f, true);
WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
@@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
* something external causing them (f.e. using a debugger while
* mmio tracing enabled), or erroneous behaviour
*/
- pr_warning("unexpected debug trap on CPU %d.\n",
- smp_processor_id());
+ pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
goto out;
}
diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c
new file mode 100644
index 000000000000..f5b85bdc0535
--- /dev/null
+++ b/arch/x86/mm/maccess.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/uaccess.h>
+#include <linux/kernel.h>
+
+#ifdef CONFIG_X86_64
+static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits)
+{
+ return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits);
+}
+
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ /*
+ * Range covering the highest possible canonical userspace address
+ * as well as non-canonical address range. For the canonical range
+ * we also need to include the userspace guard page.
+ */
+ return vaddr < TASK_SIZE_MAX + PAGE_SIZE ||
+ canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr;
+}
+#else
+static __always_inline bool invalid_probe_range(u64 vaddr)
+{
+ return vaddr < TASK_SIZE_MAX;
+}
+#endif
+
+long probe_kernel_read_strict(void *dst, const void *src, size_t size)
+{
+ if (unlikely(invalid_probe_range((unsigned long)src)))
+ return -EFAULT;
+
+ return __probe_kernel_read(dst, src, size);
+}
+
+long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count)
+{
+ if (unlikely(invalid_probe_range((unsigned long)unsafe_addr)))
+ return -EFAULT;
+
+ return __strncpy_from_unsafe(dst, unsafe_addr, count);
+}
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 6d71481a1e70..106ead05bbe3 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -16,7 +16,7 @@
.text
.code64
-ENTRY(sme_encrypt_execute)
+SYM_FUNC_START(sme_encrypt_execute)
/*
* Entry parameters:
@@ -66,9 +66,9 @@ ENTRY(sme_encrypt_execute)
pop %rbp
ret
-ENDPROC(sme_encrypt_execute)
+SYM_FUNC_END(sme_encrypt_execute)
-ENTRY(__enc_copy)
+SYM_FUNC_START(__enc_copy)
/*
* Routine used to encrypt memory in place.
* This routine must be run outside of the kernel proper since
@@ -153,4 +153,4 @@ ENTRY(__enc_copy)
ret
.L__enc_copy_end:
-ENDPROC(__enc_copy)
+SYM_FUNC_END(__enc_copy)
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c
index b8ef8557d4b3..673de6063345 100644
--- a/arch/x86/mm/mmio-mod.c
+++ b/arch/x86/mm/mmio-mod.c
@@ -394,7 +394,7 @@ static void enter_uniprocessor(void)
}
out:
if (num_online_cpus() > 1)
- pr_warning("multiple CPUs still online, may miss events.\n");
+ pr_warn("multiple CPUs still online, may miss events.\n");
}
static void leave_uniprocessor(void)
@@ -418,8 +418,8 @@ static void leave_uniprocessor(void)
static void enter_uniprocessor(void)
{
if (num_online_cpus() > 1)
- pr_warning("multiple CPUs are online, may miss events. "
- "Suggest booting with maxcpus=1 kernel argument.\n");
+ pr_warn("multiple CPUs are online, may miss events. "
+ "Suggest booting with maxcpus=1 kernel argument.\n");
}
static void leave_uniprocessor(void)
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 4123100e0eaf..99f7a68738f0 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -699,7 +699,7 @@ static int __init dummy_numa_init(void)
* x86_numa_init - Initialize NUMA
*
* Try each configured NUMA initialization method until one succeeds. The
- * last fallback is dummy single node config encomapssing whole memory and
+ * last fallback is dummy single node config encompassing whole memory and
* never fails.
*/
void __init x86_numa_init(void)
diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c
index abffa0be80da..7f1d2034df1e 100644
--- a/arch/x86/mm/numa_emulation.c
+++ b/arch/x86/mm/numa_emulation.c
@@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
goto no_emu;
if (numa_cleanup_meminfo(&ei) < 0) {
- pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
+ pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n");
goto no_emu;
}
@@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt)
phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
phys_size, PAGE_SIZE);
if (!phys) {
- pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
+ pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n");
goto no_emu;
}
memblock_reserve(phys, phys_size);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index d9fbd4f69920..2d758e19ef22 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type,
spin_lock(&memtype_lock);
- err = rbt_memtype_check_insert(new, new_type);
+ err = memtype_check_insert(new, new_type);
if (err) {
pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n",
start, end - 1,
@@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end)
}
spin_lock(&memtype_lock);
- entry = rbt_memtype_erase(start, end);
+ entry = memtype_erase(start, end);
spin_unlock(&memtype_lock);
if (IS_ERR(entry)) {
@@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr)
spin_lock(&memtype_lock);
- entry = rbt_memtype_lookup(paddr);
+ entry = memtype_lookup(paddr);
if (entry != NULL)
rettype = entry->type;
else
@@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos)
return NULL;
spin_lock(&memtype_lock);
- ret = rbt_memtype_copy_nth_element(print_entry, pos);
+ ret = memtype_copy_nth_element(print_entry, pos);
spin_unlock(&memtype_lock);
if (!ret) {
diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h
index eeb5caeb089b..79a06684349e 100644
--- a/arch/x86/mm/pat_internal.h
+++ b/arch/x86/mm/pat_internal.h
@@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm)
}
#ifdef CONFIG_X86_PAT
-extern int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type);
-extern struct memtype *rbt_memtype_erase(u64 start, u64 end);
-extern struct memtype *rbt_memtype_lookup(u64 addr);
-extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos);
+extern int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type);
+extern struct memtype *memtype_erase(u64 start, u64 end);
+extern struct memtype *memtype_lookup(u64 addr);
+extern int memtype_copy_nth_element(struct memtype *out, loff_t pos);
#else
-static inline int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *new_type)
+static inline int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *new_type)
{ return 0; }
-static inline struct memtype *rbt_memtype_erase(u64 start, u64 end)
+static inline struct memtype *memtype_erase(u64 start, u64 end)
{ return NULL; }
-static inline struct memtype *rbt_memtype_lookup(u64 addr)
+static inline struct memtype *memtype_lookup(u64 addr)
{ return NULL; }
-static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
+static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos)
{ return 0; }
#endif
diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c
new file mode 100644
index 000000000000..47a1bf30748f
--- /dev/null
+++ b/arch/x86/mm/pat_interval.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle caching attributes in page tables (PAT)
+ *
+ * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ * Suresh B Siddha <suresh.b.siddha@intel.com>
+ *
+ * Interval tree used to store the PAT memory type reservations.
+ */
+
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/kernel.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+
+#include <asm/pgtable.h>
+#include <asm/pat.h>
+
+#include "pat_internal.h"
+
+/*
+ * The memtype tree keeps track of memory type for specific
+ * physical memory areas. Without proper tracking, conflicting memory
+ * types in different mappings can cause CPU cache corruption.
+ *
+ * The tree is an interval tree (augmented rbtree) with tree ordered
+ * on starting address. Tree can contain multiple entries for
+ * different regions which overlap. All the aliases have the same
+ * cache attributes of course.
+ *
+ * memtype_lock protects the rbtree.
+ */
+static inline u64 memtype_interval_start(struct memtype *memtype)
+{
+ return memtype->start;
+}
+
+static inline u64 memtype_interval_end(struct memtype *memtype)
+{
+ return memtype->end - 1;
+}
+INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end,
+ memtype_interval_start, memtype_interval_end,
+ static, memtype_interval)
+
+static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED;
+
+enum {
+ MEMTYPE_EXACT_MATCH = 0,
+ MEMTYPE_END_MATCH = 1
+};
+
+static struct memtype *memtype_match(u64 start, u64 end, int match_type)
+{
+ struct memtype *match;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ while (match != NULL && match->start < end) {
+ if ((match_type == MEMTYPE_EXACT_MATCH) &&
+ (match->start == start) && (match->end == end))
+ return match;
+
+ if ((match_type == MEMTYPE_END_MATCH) &&
+ (match->start < start) && (match->end == end))
+ return match;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+
+ return NULL; /* Returns NULL if there is no match */
+}
+
+static int memtype_check_conflict(u64 start, u64 end,
+ enum page_cache_mode reqtype,
+ enum page_cache_mode *newtype)
+{
+ struct memtype *match;
+ enum page_cache_mode found_type = reqtype;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, start, end);
+ if (match == NULL)
+ goto success;
+
+ if (match->type != found_type && newtype == NULL)
+ goto failure;
+
+ dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
+ found_type = match->type;
+
+ match = memtype_interval_iter_next(match, start, end);
+ while (match) {
+ if (match->type != found_type)
+ goto failure;
+
+ match = memtype_interval_iter_next(match, start, end);
+ }
+success:
+ if (newtype)
+ *newtype = found_type;
+
+ return 0;
+
+failure:
+ pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
+ current->comm, current->pid, start, end,
+ cattr_name(found_type), cattr_name(match->type));
+ return -EBUSY;
+}
+
+int memtype_check_insert(struct memtype *new,
+ enum page_cache_mode *ret_type)
+{
+ int err = 0;
+
+ err = memtype_check_conflict(new->start, new->end, new->type, ret_type);
+ if (err)
+ return err;
+
+ if (ret_type)
+ new->type = *ret_type;
+
+ memtype_interval_insert(new, &memtype_rbroot);
+ return 0;
+}
+
+struct memtype *memtype_erase(u64 start, u64 end)
+{
+ struct memtype *data;
+
+ /*
+ * Since the memtype_rbroot tree allows overlapping ranges,
+ * memtype_erase() checks with EXACT_MATCH first, i.e. free
+ * a whole node for the munmap case. If no such entry is found,
+ * it then checks with END_MATCH, i.e. shrink the size of a node
+ * from the end for the mremap case.
+ */
+ data = memtype_match(start, end, MEMTYPE_EXACT_MATCH);
+ if (!data) {
+ data = memtype_match(start, end, MEMTYPE_END_MATCH);
+ if (!data)
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (data->start == start) {
+ /* munmap: erase this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ } else {
+ /* mremap: update the end value of this node */
+ memtype_interval_remove(data, &memtype_rbroot);
+ data->end = start;
+ memtype_interval_insert(data, &memtype_rbroot);
+ return NULL;
+ }
+
+ return data;
+}
+
+struct memtype *memtype_lookup(u64 addr)
+{
+ return memtype_interval_iter_first(&memtype_rbroot, addr,
+ addr + PAGE_SIZE);
+}
+
+#if defined(CONFIG_DEBUG_FS)
+int memtype_copy_nth_element(struct memtype *out, loff_t pos)
+{
+ struct memtype *match;
+ int i = 1;
+
+ match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX);
+ while (match && pos != i) {
+ match = memtype_interval_iter_next(match, 0, ULONG_MAX);
+ i++;
+ }
+
+ if (match) { /* pos == i */
+ *out = *match;
+ return 0;
+ } else {
+ return 1;
+ }
+}
+#endif
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
deleted file mode 100644
index 65ebe4b88f7c..000000000000
--- a/arch/x86/mm/pat_rbtree.c
+++ /dev/null
@@ -1,268 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Handle caching attributes in page tables (PAT)
- *
- * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * Suresh B Siddha <suresh.b.siddha@intel.com>
- *
- * Interval tree (augmented rbtree) used to store the PAT memory type
- * reservations.
- */
-
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/kernel.h>
-#include <linux/rbtree_augmented.h>
-#include <linux/sched.h>
-#include <linux/gfp.h>
-
-#include <asm/pgtable.h>
-#include <asm/pat.h>
-
-#include "pat_internal.h"
-
-/*
- * The memtype tree keeps track of memory type for specific
- * physical memory areas. Without proper tracking, conflicting memory
- * types in different mappings can cause CPU cache corruption.
- *
- * The tree is an interval tree (augmented rbtree) with tree ordered
- * on starting address. Tree can contain multiple entries for
- * different regions which overlap. All the aliases have the same
- * cache attributes of course.
- *
- * memtype_lock protects the rbtree.
- */
-
-static struct rb_root memtype_rbroot = RB_ROOT;
-
-static int is_node_overlap(struct memtype *node, u64 start, u64 end)
-{
- if (node->start >= end || node->end <= start)
- return 0;
-
- return 1;
-}
-
-static u64 get_subtree_max_end(struct rb_node *node)
-{
- u64 ret = 0;
- if (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
- ret = data->subtree_max_end;
- }
- return ret;
-}
-
-#define NODE_END(node) ((node)->end)
-
-RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb,
- struct memtype, rb, u64, subtree_max_end, NODE_END)
-
-/* Find the first (lowest start addr) overlapping range from rb tree */
-static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
- u64 start, u64 end)
-{
- struct rb_node *node = root->rb_node;
- struct memtype *last_lower = NULL;
-
- while (node) {
- struct memtype *data = rb_entry(node, struct memtype, rb);
-
- if (get_subtree_max_end(node->rb_left) > start) {
- /* Lowest overlap if any must be on left side */
- node = node->rb_left;
- } else if (is_node_overlap(data, start, end)) {
- last_lower = data;
- break;
- } else if (start >= data->start) {
- /* Lowest overlap if any must be on right side */
- node = node->rb_right;
- } else {
- break;
- }
- }
- return last_lower; /* Returns NULL if there is no overlap */
-}
-
-enum {
- MEMTYPE_EXACT_MATCH = 0,
- MEMTYPE_END_MATCH = 1
-};
-
-static struct memtype *memtype_rb_match(struct rb_root *root,
- u64 start, u64 end, int match_type)
-{
- struct memtype *match;
-
- match = memtype_rb_lowest_match(root, start, end);
- while (match != NULL && match->start < end) {
- struct rb_node *node;
-
- if ((match_type == MEMTYPE_EXACT_MATCH) &&
- (match->start == start) && (match->end == end))
- return match;
-
- if ((match_type == MEMTYPE_END_MATCH) &&
- (match->start < start) && (match->end == end))
- return match;
-
- node = rb_next(&match->rb);
- if (node)
- match = rb_entry(node, struct memtype, rb);
- else
- match = NULL;
- }
-
- return NULL; /* Returns NULL if there is no match */
-}
-
-static int memtype_rb_check_conflict(struct rb_root *root,
- u64 start, u64 end,
- enum page_cache_mode reqtype,
- enum page_cache_mode *newtype)
-{
- struct rb_node *node;
- struct memtype *match;
- enum page_cache_mode found_type = reqtype;
-
- match = memtype_rb_lowest_match(&memtype_rbroot, start, end);
- if (match == NULL)
- goto success;
-
- if (match->type != found_type && newtype == NULL)
- goto failure;
-
- dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end);
- found_type = match->type;
-
- node = rb_next(&match->rb);
- while (node) {
- match = rb_entry(node, struct memtype, rb);
-
- if (match->start >= end) /* Checked all possible matches */
- goto success;
-
- if (is_node_overlap(match, start, end) &&
- match->type != found_type) {
- goto failure;
- }
-
- node = rb_next(&match->rb);
- }
-success:
- if (newtype)
- *newtype = found_type;
-
- return 0;
-
-failure:
- pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n",
- current->comm, current->pid, start, end,
- cattr_name(found_type), cattr_name(match->type));
- return -EBUSY;
-}
-
-static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
-{
- struct rb_node **node = &(root->rb_node);
- struct rb_node *parent = NULL;
-
- while (*node) {
- struct memtype *data = rb_entry(*node, struct memtype, rb);
-
- parent = *node;
- if (data->subtree_max_end < newdata->end)
- data->subtree_max_end = newdata->end;
- if (newdata->start <= data->start)
- node = &((*node)->rb_left);
- else if (newdata->start > data->start)
- node = &((*node)->rb_right);
- }
-
- newdata->subtree_max_end = newdata->end;
- rb_link_node(&newdata->rb, parent, node);
- rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
-}
-
-int rbt_memtype_check_insert(struct memtype *new,
- enum page_cache_mode *ret_type)
-{
- int err = 0;
-
- err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end,
- new->type, ret_type);
-
- if (!err) {
- if (ret_type)
- new->type = *ret_type;
-
- new->subtree_max_end = new->end;
- memtype_rb_insert(&memtype_rbroot, new);
- }
- return err;
-}
-
-struct memtype *rbt_memtype_erase(u64 start, u64 end)
-{
- struct memtype *data;
-
- /*
- * Since the memtype_rbroot tree allows overlapping ranges,
- * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free
- * a whole node for the munmap case. If no such entry is found,
- * it then checks with END_MATCH, i.e. shrink the size of a node
- * from the end for the mremap case.
- */
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_EXACT_MATCH);
- if (!data) {
- data = memtype_rb_match(&memtype_rbroot, start, end,
- MEMTYPE_END_MATCH);
- if (!data)
- return ERR_PTR(-EINVAL);
- }
-
- if (data->start == start) {
- /* munmap: erase this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
- } else {
- /* mremap: update the end value of this node */
- rb_erase_augmented(&data->rb, &memtype_rbroot,
- &memtype_rb_augment_cb);
- data->end = start;
- data->subtree_max_end = data->end;
- memtype_rb_insert(&memtype_rbroot, data);
- return NULL;
- }
-
- return data;
-}
-
-struct memtype *rbt_memtype_lookup(u64 addr)
-{
- return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE);
-}
-
-#if defined(CONFIG_DEBUG_FS)
-int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos)
-{
- struct rb_node *node;
- int i = 1;
-
- node = rb_first(&memtype_rbroot);
- while (node && pos != i) {
- node = rb_next(node);
- i++;
- }
-
- if (node) { /* pos == i */
- struct memtype *this = rb_entry(node, struct memtype, rb);
- *out = *this;
- return 0;
- } else {
- return 1;
- }
-}
-#endif
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 3e4b9035bb9a..7bd2c3a52297 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -643,8 +643,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte)
fixmaps_set++;
}
-void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys,
- pgprot_t flags)
+void native_set_fixmap(unsigned /* enum fixed_addresses */ idx,
+ phys_addr_t phys, pgprot_t flags)
{
/* Sanitize 'prot' against any unsupported bits: */
pgprot_val(flags) &= __default_kernel_pte_mask;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 7f2140414440..44a9f068eee0 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -574,7 +574,7 @@ static void pti_clone_kernel_text(void)
*/
unsigned long start = PFN_ALIGN(_text);
unsigned long end_clone = (unsigned long)__end_rodata_aligned;
- unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table);
+ unsigned long end_global = PFN_ALIGN((unsigned long)_etext);
if (!pti_kernel_image_global_ok())
return;
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c
index a8bd952e136d..92153d054d6c 100644
--- a/arch/x86/mm/testmmiotrace.c
+++ b/arch/x86/mm/testmmiotrace.c
@@ -127,9 +127,9 @@ static int __init init(void)
return -ENXIO;
}
- pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
- "and writing 16 kB of rubbish in there.\n",
- size >> 10, mmio_address);
+ pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, "
+ "and writing 16 kB of rubbish in there.\n",
+ size >> 10, mmio_address);
do_test(size);
do_test_bulk_ioremapping();
pr_info("All done.\n");
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 991549a1c5f3..b8be18427277 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -9,9 +9,11 @@
#include <linux/filter.h>
#include <linux/if_vlan.h>
#include <linux/bpf.h>
-
+#include <linux/memory.h>
+#include <asm/extable.h>
#include <asm/set_memory.h>
#include <asm/nospec-branch.h>
+#include <asm/text-patching.h>
static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
{
@@ -96,6 +98,7 @@ static int bpf_size_to_x86_bytes(int bpf_size)
/* Pick a register outside of BPF range for JIT internal work */
#define AUX_REG (MAX_BPF_JIT_REG + 1)
+#define X86_REG_R9 (MAX_BPF_JIT_REG + 2)
/*
* The following table maps BPF registers to x86-64 registers.
@@ -104,8 +107,8 @@ static int bpf_size_to_x86_bytes(int bpf_size)
* register in load/store instructions, it always needs an
* extra byte of encoding and is callee saved.
*
- * Also x86-64 register R9 is unused. x86-64 register R10 is
- * used for blinding (if enabled).
+ * x86-64 register R9 is not used by BPF programs, but can be used by BPF
+ * trampoline. x86-64 register R10 is used for blinding (if enabled).
*/
static const int reg2hex[] = {
[BPF_REG_0] = 0, /* RAX */
@@ -121,6 +124,20 @@ static const int reg2hex[] = {
[BPF_REG_FP] = 5, /* RBP readonly */
[BPF_REG_AX] = 2, /* R10 temp register */
[AUX_REG] = 3, /* R11 temp register */
+ [X86_REG_R9] = 1, /* R9 register, 6th function argument */
+};
+
+static const int reg2pt_regs[] = {
+ [BPF_REG_0] = offsetof(struct pt_regs, ax),
+ [BPF_REG_1] = offsetof(struct pt_regs, di),
+ [BPF_REG_2] = offsetof(struct pt_regs, si),
+ [BPF_REG_3] = offsetof(struct pt_regs, dx),
+ [BPF_REG_4] = offsetof(struct pt_regs, cx),
+ [BPF_REG_5] = offsetof(struct pt_regs, r8),
+ [BPF_REG_6] = offsetof(struct pt_regs, bx),
+ [BPF_REG_7] = offsetof(struct pt_regs, r13),
+ [BPF_REG_8] = offsetof(struct pt_regs, r14),
+ [BPF_REG_9] = offsetof(struct pt_regs, r15),
};
/*
@@ -135,6 +152,7 @@ static bool is_ereg(u32 reg)
BIT(BPF_REG_7) |
BIT(BPF_REG_8) |
BIT(BPF_REG_9) |
+ BIT(X86_REG_R9) |
BIT(BPF_REG_AX));
}
@@ -186,7 +204,10 @@ struct jit_context {
#define BPF_MAX_INSN_SIZE 128
#define BPF_INSN_SAFETY 64
-#define PROLOGUE_SIZE 20
+/* Number of bytes emit_patch() needs to generate instructions */
+#define X86_PATCH_SIZE 5
+
+#define PROLOGUE_SIZE 25
/*
* Emit x86-64 prologue code for BPF program and check its size.
@@ -195,8 +216,13 @@ struct jit_context {
static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
{
u8 *prog = *pprog;
- int cnt = 0;
+ int cnt = X86_PATCH_SIZE;
+ /* BPF trampoline can be made to work without these nops,
+ * but let's waste 5 bytes for now and optimize later
+ */
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt);
+ prog += cnt;
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
/* sub rsp, rounded_stack_depth */
@@ -213,6 +239,89 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
*pprog = prog;
}
+static int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+ s64 offset;
+
+ offset = func - (ip + X86_PATCH_SIZE);
+ if (!is_simm32(offset)) {
+ pr_err("Target call %p is out of range\n", func);
+ return -ERANGE;
+ }
+ EMIT1_off32(opcode, offset);
+ *pprog = prog;
+ return 0;
+}
+
+static int emit_call(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE8);
+}
+
+static int emit_jump(u8 **pprog, void *func, void *ip)
+{
+ return emit_patch(pprog, func, ip, 0xE9);
+}
+
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr,
+ const bool text_live)
+{
+ const u8 *nop_insn = ideal_nops[NOP_ATOMIC5];
+ u8 old_insn[X86_PATCH_SIZE];
+ u8 new_insn[X86_PATCH_SIZE];
+ u8 *prog;
+ int ret;
+
+ memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
+ if (old_addr) {
+ prog = old_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, old_addr, ip) :
+ emit_jump(&prog, old_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
+ if (new_addr) {
+ prog = new_insn;
+ ret = t == BPF_MOD_CALL ?
+ emit_call(&prog, new_addr, ip) :
+ emit_jump(&prog, new_addr, ip);
+ if (ret)
+ return ret;
+ }
+
+ ret = -EBUSY;
+ mutex_lock(&text_mutex);
+ if (memcmp(ip, old_insn, X86_PATCH_SIZE))
+ goto out;
+ if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
+ if (text_live)
+ text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ else
+ memcpy(ip, new_insn, X86_PATCH_SIZE);
+ }
+ ret = 0;
+out:
+ mutex_unlock(&text_mutex);
+ return ret;
+}
+
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+ void *old_addr, void *new_addr)
+{
+ if (!is_kernel_text((long)ip) &&
+ !is_bpf_text_address((long)ip))
+ /* BPF poking in modules is not supported */
+ return -EINVAL;
+
+ return __bpf_arch_text_poke(ip, t, old_addr, new_addr, true);
+}
+
/*
* Generate the following code:
*
@@ -227,7 +336,7 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
* goto *(prog->bpf_func + prologue_size);
* out:
*/
-static void emit_bpf_tail_call(u8 **pprog)
+static void emit_bpf_tail_call_indirect(u8 **pprog)
{
u8 *prog = *pprog;
int label1, label2, label3;
@@ -294,6 +403,68 @@ static void emit_bpf_tail_call(u8 **pprog)
*pprog = prog;
}
+static void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke,
+ u8 **pprog, int addr, u8 *image)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ /*
+ * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
+ * goto out;
+ */
+ EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */
+ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */
+ EMIT2(X86_JA, 14); /* ja out */
+ EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */
+ EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */
+
+ poke->ip = image + (addr - X86_PATCH_SIZE);
+ poke->adj_off = PROLOGUE_SIZE;
+
+ memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+ prog += X86_PATCH_SIZE;
+ /* out: */
+
+ *pprog = prog;
+}
+
+static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
+{
+ struct bpf_jit_poke_descriptor *poke;
+ struct bpf_array *array;
+ struct bpf_prog *target;
+ int i, ret;
+
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ poke = &prog->aux->poke_tab[i];
+ WARN_ON_ONCE(READ_ONCE(poke->ip_stable));
+
+ if (poke->reason != BPF_POKE_REASON_TAIL_CALL)
+ continue;
+
+ array = container_of(poke->tail_call.map, struct bpf_array, map);
+ mutex_lock(&array->aux->poke_mutex);
+ target = array->ptrs[poke->tail_call.key];
+ if (target) {
+ /* Plain memcpy is used when image is not live yet
+ * and still not locked as read-only. Once poke
+ * location is active (poke->ip_stable), any parallel
+ * bpf_arch_text_poke() might occur still on the
+ * read-write image until we finally locked it as
+ * read-only. Both modifications on the given image
+ * are under text_mutex to avoid interference.
+ */
+ ret = __bpf_arch_text_poke(poke->ip, BPF_MOD_JUMP, NULL,
+ (u8 *)target->bpf_func +
+ poke->adj_off, false);
+ BUG_ON(ret < 0);
+ }
+ WRITE_ONCE(poke->ip_stable, true);
+ mutex_unlock(&array->aux->poke_mutex);
+ }
+}
+
static void emit_mov_imm32(u8 **pprog, bool sign_propagate,
u32 dst_reg, const u32 imm32)
{
@@ -377,6 +548,96 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg)
*pprog = prog;
}
+/* LDX: dst_reg = *(u8*)(src_reg + off) */
+static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'movzx rax, byte ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
+ break;
+ case BPF_H:
+ /* Emit 'movzx rax, word ptr [rax + off]' */
+ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
+ break;
+ case BPF_W:
+ /* Emit 'mov eax, dword ptr [rax+0x14]' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
+ else
+ EMIT1(0x8B);
+ break;
+ case BPF_DW:
+ /* Emit 'mov rax, qword ptr [rax+0x14]' */
+ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
+ break;
+ }
+ /*
+ * If insn->off == 0 we can save one extra byte, but
+ * special case of x86 R13 which always needs an offset
+ * is not worth the hassle
+ */
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, src_reg, dst_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off);
+ *pprog = prog;
+}
+
+/* STX: *(u8*)(dst_reg + off) = src_reg */
+static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ u8 *prog = *pprog;
+ int cnt = 0;
+
+ switch (size) {
+ case BPF_B:
+ /* Emit 'mov byte ptr [rax + off], al' */
+ if (is_ereg(dst_reg) || is_ereg(src_reg) ||
+ /* We have to add extra byte for x86 SIL, DIL regs */
+ src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
+ else
+ EMIT1(0x88);
+ break;
+ case BPF_H:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT2(0x66, 0x89);
+ break;
+ case BPF_W:
+ if (is_ereg(dst_reg) || is_ereg(src_reg))
+ EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
+ else
+ EMIT1(0x89);
+ break;
+ case BPF_DW:
+ EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
+ break;
+ }
+ if (is_imm8(off))
+ EMIT2(add_2reg(0x40, dst_reg, src_reg), off);
+ else
+ EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off);
+ *pprog = prog;
+}
+
+static bool ex_handler_bpf(const struct exception_table_entry *x,
+ struct pt_regs *regs, int trapnr,
+ unsigned long error_code, unsigned long fault_addr)
+{
+ u32 reg = x->fixup >> 8;
+
+ /* jump over faulting load and clear dest register */
+ *(unsigned long *)((void *)regs + reg) = 0;
+ regs->ip += x->fixup & 0xff;
+ return true;
+}
+
static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int oldproglen, struct jit_context *ctx)
{
@@ -384,7 +645,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
int insn_cnt = bpf_prog->len;
bool seen_exit = false;
u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
- int i, cnt = 0;
+ int i, cnt = 0, excnt = 0;
int proglen = 0;
u8 *prog = temp;
@@ -747,64 +1008,64 @@ st: if (is_imm8(insn->off))
/* STX: *(u8*)(dst_reg + off) = src_reg */
case BPF_STX | BPF_MEM | BPF_B:
- /* Emit 'mov byte ptr [rax + off], al' */
- if (is_ereg(dst_reg) || is_ereg(src_reg) ||
- /* We have to add extra byte for x86 SIL, DIL regs */
- src_reg == BPF_REG_1 || src_reg == BPF_REG_2)
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88);
- else
- EMIT1(0x88);
- goto stx;
case BPF_STX | BPF_MEM | BPF_H:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT2(0x66, 0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_W:
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89);
- else
- EMIT1(0x89);
- goto stx;
case BPF_STX | BPF_MEM | BPF_DW:
- EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89);
-stx: if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, dst_reg, src_reg),
- insn->off);
+ emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
break;
/* LDX: dst_reg = *(u8*)(src_reg + off) */
case BPF_LDX | BPF_MEM | BPF_B:
- /* Emit 'movzx rax, byte ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_B:
case BPF_LDX | BPF_MEM | BPF_H:
- /* Emit 'movzx rax, word ptr [rax + off]' */
- EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_H:
case BPF_LDX | BPF_MEM | BPF_W:
- /* Emit 'mov eax, dword ptr [rax+0x14]' */
- if (is_ereg(dst_reg) || is_ereg(src_reg))
- EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B);
- else
- EMIT1(0x8B);
- goto ldx;
+ case BPF_LDX | BPF_PROBE_MEM | BPF_W:
case BPF_LDX | BPF_MEM | BPF_DW:
- /* Emit 'mov rax, qword ptr [rax+0x14]' */
- EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B);
-ldx: /*
- * If insn->off == 0 we can save one extra byte, but
- * special case of x86 R13 which always needs an offset
- * is not worth the hassle
- */
- if (is_imm8(insn->off))
- EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off);
- else
- EMIT1_off32(add_2reg(0x80, src_reg, dst_reg),
- insn->off);
+ case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+ emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM) {
+ struct exception_table_entry *ex;
+ u8 *_insn = image + proglen;
+ s64 delta;
+
+ if (!bpf_prog->aux->extable)
+ break;
+
+ if (excnt >= bpf_prog->aux->num_exentries) {
+ pr_err("ex gen bug\n");
+ return -EFAULT;
+ }
+ ex = &bpf_prog->aux->extable[excnt++];
+
+ delta = _insn - (u8 *)&ex->insn;
+ if (!is_simm32(delta)) {
+ pr_err("extable->insn doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->insn = delta;
+
+ delta = (u8 *)ex_handler_bpf - (u8 *)&ex->handler;
+ if (!is_simm32(delta)) {
+ pr_err("extable->handler doesn't fit into 32-bit\n");
+ return -EFAULT;
+ }
+ ex->handler = delta;
+
+ if (dst_reg > BPF_REG_9) {
+ pr_err("verifier error\n");
+ return -EFAULT;
+ }
+ /*
+ * Compute size of x86 insn and its target dest x86 register.
+ * ex_handler_bpf() will use lower 8 bits to adjust
+ * pt_regs->ip to jump over this x86 instruction
+ * and upper bits to figure out which pt_regs to zero out.
+ * End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
+ * of 4 bytes will be ignored and rbx will be zero inited.
+ */
+ ex->fixup = (prog - temp) | (reg2pt_regs[dst_reg] << 8);
+ }
break;
/* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */
@@ -827,17 +1088,16 @@ xadd: if (is_imm8(insn->off))
/* call */
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
- jmp_offset = func - (image + addrs[i]);
- if (!imm32 || !is_simm32(jmp_offset)) {
- pr_err("unsupported BPF func %d addr %p image %p\n",
- imm32, func, image);
+ if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
return -EINVAL;
- }
- EMIT1_off32(0xE8, jmp_offset);
break;
case BPF_JMP | BPF_TAIL_CALL:
- emit_bpf_tail_call(&prog);
+ if (imm32)
+ emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1],
+ &prog, addrs[i], image);
+ else
+ emit_bpf_tail_call_indirect(&prog);
break;
/* cond jump */
@@ -909,6 +1169,16 @@ xadd: if (is_imm8(insn->off))
case BPF_JMP32 | BPF_JSLT | BPF_K:
case BPF_JMP32 | BPF_JSGE | BPF_K:
case BPF_JMP32 | BPF_JSLE | BPF_K:
+ /* test dst_reg, dst_reg to save one extra byte */
+ if (imm32 == 0) {
+ if (BPF_CLASS(insn->code) == BPF_JMP)
+ EMIT1(add_2mod(0x48, dst_reg, dst_reg));
+ else if (is_ereg(dst_reg))
+ EMIT1(add_2mod(0x40, dst_reg, dst_reg));
+ EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg));
+ goto emit_cond_jmp;
+ }
+
/* cmp dst_reg, imm8/32 */
if (BPF_CLASS(insn->code) == BPF_JMP)
EMIT1(add_1mod(0x48, dst_reg));
@@ -1048,9 +1318,218 @@ emit_jmp:
addrs[i] = proglen;
prog = temp;
}
+
+ if (image && excnt != bpf_prog->aux->num_exentries) {
+ pr_err("extable is not populated\n");
+ return -EFAULT;
+ }
return proglen;
}
+static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+ /* Store function arguments to stack.
+ * For a function that accepts two pointers the sequence will be:
+ * mov QWORD PTR [rbp-0x10],rdi
+ * mov QWORD PTR [rbp-0x8],rsi
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ BPF_REG_FP,
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ -(stack_size - i * 8));
+}
+
+static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args,
+ int stack_size)
+{
+ int i;
+
+ /* Restore function arguments from stack.
+ * For a function that accepts two pointers the sequence will be:
+ * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10]
+ * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8]
+ */
+ for (i = 0; i < min(nr_args, 6); i++)
+ emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]),
+ i == 5 ? X86_REG_R9 : BPF_REG_1 + i,
+ BPF_REG_FP,
+ -(stack_size - i * 8));
+}
+
+static int invoke_bpf(struct btf_func_model *m, u8 **pprog,
+ struct bpf_prog **progs, int prog_cnt, int stack_size)
+{
+ u8 *prog = *pprog;
+ int cnt = 0, i;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (emit_call(&prog, __bpf_prog_enter, prog))
+ return -EINVAL;
+ /* remember prog start time returned by __bpf_prog_enter */
+ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
+
+ /* arg1: lea rdi, [rbp - stack_size] */
+ EMIT4(0x48, 0x8D, 0x7D, -stack_size);
+ /* arg2: progs[i]->insnsi for interpreter */
+ if (!progs[i]->jited)
+ emit_mov_imm64(&prog, BPF_REG_2,
+ (long) progs[i]->insnsi >> 32,
+ (u32) (long) progs[i]->insnsi);
+ /* call JITed bpf program or interpreter */
+ if (emit_call(&prog, progs[i]->bpf_func, prog))
+ return -EINVAL;
+
+ /* arg1: mov rdi, progs[i] */
+ emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32,
+ (u32) (long) progs[i]);
+ /* arg2: mov rsi, rbx <- start time in nsec */
+ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
+ if (emit_call(&prog, __bpf_prog_exit, prog))
+ return -EINVAL;
+ }
+ *pprog = prog;
+ return 0;
+}
+
+/* Example:
+ * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
+ * its 'struct btf_func_model' will be nr_args=2
+ * The assembly code when eth_type_trans is executing after trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 16 // space for skb and dev
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 16], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 8], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 16] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack
+ * pop rbx
+ * leave
+ * ret
+ *
+ * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be
+ * replaced with 'call generated_bpf_trampoline'. When it returns
+ * eth_type_trans will continue executing with original skb and dev pointers.
+ *
+ * The assembly code when eth_type_trans is called from trampoline:
+ *
+ * push rbp
+ * mov rbp, rsp
+ * sub rsp, 24 // space for skb, dev, return value
+ * push rbx // temp regs to pass start time
+ * mov qword ptr [rbp - 24], rdi // save skb pointer to stack
+ * mov qword ptr [rbp - 16], rsi // save dev pointer to stack
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time if bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack
+ * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack
+ * call eth_type_trans+5 // execute body of eth_type_trans
+ * mov qword ptr [rbp - 8], rax // save return value
+ * call __bpf_prog_enter // rcu_read_lock and preempt_disable
+ * mov rbx, rax // remember start time in bpf stats are enabled
+ * lea rdi, [rbp - 24] // R1==ctx of bpf prog
+ * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value
+ * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off
+ * mov rsi, rbx // prog start time
+ * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math
+ * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value
+ * pop rbx
+ * leave
+ * add rsp, 8 // skip eth_type_trans's frame
+ * ret // return to its caller
+ */
+int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags,
+ struct bpf_prog **fentry_progs, int fentry_cnt,
+ struct bpf_prog **fexit_progs, int fexit_cnt,
+ void *orig_call)
+{
+ int cnt = 0, nr_args = m->nr_args;
+ int stack_size = nr_args * 8;
+ u8 *prog;
+
+ /* x86-64 supports up to 6 arguments. 7+ can be added in the future */
+ if (nr_args > 6)
+ return -ENOTSUPP;
+
+ if ((flags & BPF_TRAMP_F_RESTORE_REGS) &&
+ (flags & BPF_TRAMP_F_SKIP_FRAME))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ stack_size += 8; /* room for return value of orig_call */
+
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip patched call instruction and point orig_call to actual
+ * body of the kernel function.
+ */
+ orig_call += X86_PATCH_SIZE;
+
+ prog = image;
+
+ EMIT1(0x55); /* push rbp */
+ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
+ EMIT1(0x53); /* push rbx */
+
+ save_regs(m, &prog, nr_args, stack_size);
+
+ if (fentry_cnt)
+ if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
+ if (fentry_cnt)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ /* call original function */
+ if (emit_call(&prog, orig_call, prog))
+ return -EINVAL;
+ /* remember return value in a stack for bpf prog to access */
+ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+ }
+
+ if (fexit_cnt)
+ if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size))
+ return -EINVAL;
+
+ if (flags & BPF_TRAMP_F_RESTORE_REGS)
+ restore_regs(m, &prog, nr_args, stack_size);
+
+ if (flags & BPF_TRAMP_F_CALL_ORIG)
+ /* restore original return value back into RAX */
+ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+
+ EMIT1(0x5B); /* pop rbx */
+ EMIT1(0xC9); /* leave */
+ if (flags & BPF_TRAMP_F_SKIP_FRAME)
+ /* skip our return address and return to parent */
+ EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
+ EMIT1(0xC3); /* ret */
+ /* One half of the page has active running trampoline.
+ * Another half is an area for next trampoline.
+ * Make sure the trampoline generation logic doesn't overflow.
+ */
+ if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY))
+ return -EFAULT;
+ return 0;
+}
+
struct x64_jit_data {
struct bpf_binary_header *header;
int *addrs;
@@ -1148,12 +1627,24 @@ out_image:
break;
}
if (proglen == oldproglen) {
- header = bpf_jit_binary_alloc(proglen, &image,
- 1, jit_fill_hole);
+ /*
+ * The number of entries in extable is the number of BPF_LDX
+ * insns that access kernel memory via "pointer to BTF type".
+ * The verifier changed their opcode from LDX|MEM|size
+ * to LDX|PROBE_MEM|size to make JITing easier.
+ */
+ u32 align = __alignof__(struct exception_table_entry);
+ u32 extable_size = prog->aux->num_exentries *
+ sizeof(struct exception_table_entry);
+
+ /* allocate module memory for x86 insns and extable */
+ header = bpf_jit_binary_alloc(roundup(proglen, align) + extable_size,
+ &image, align, jit_fill_hole);
if (!header) {
prog = orig_prog;
goto out_addrs;
}
+ prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
cond_resched();
@@ -1164,6 +1655,7 @@ out_image:
if (image) {
if (!prog->is_func || extra_pass) {
+ bpf_tail_call_direct_fixup(prog);
bpf_jit_binary_lock_ro(header);
} else {
jit_data->addrs = addrs;
diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h
index 71e8a67337e2..276cf79b5d24 100644
--- a/arch/x86/oprofile/op_x86_model.h
+++ b/arch/x86/oprofile/op_x86_model.h
@@ -67,13 +67,13 @@ static inline void op_x86_warn_in_use(int counter)
* cannot be monitored by any other counter, contact your
* hardware or BIOS vendor.
*/
- pr_warning("oprofile: counter #%d on cpu #%d may already be used\n",
- counter, smp_processor_id());
+ pr_warn("oprofile: counter #%d on cpu #%d may already be used\n",
+ counter, smp_processor_id());
}
static inline void op_x86_warn_reserved(int counter)
{
- pr_warning("oprofile: counter #%d is already reserved\n", counter);
+ pr_warn("oprofile: counter #%d is already reserved\n", counter);
}
extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model,
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 425e025341db..38d44f36d5ed 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -128,6 +128,9 @@ void __init efi_find_mirror(void)
efi_memory_desc_t *md;
u64 mirror_size = 0, total_size = 0;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -145,14 +148,18 @@ void __init efi_find_mirror(void)
/*
* Tell the kernel about the EFI memory map. This might include
- * more than the max 128 entries that can fit in the e820 legacy
- * (zeropage) memory map.
+ * more than the max 128 entries that can fit in the passed in e820
+ * legacy (zeropage) memory map, but the kernel's e820 table can hold
+ * E820_MAX_ENTRIES.
*/
static void __init do_add_efi_memmap(void)
{
efi_memory_desc_t *md;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
unsigned long long start = md->phys_addr;
unsigned long long size = md->num_pages << EFI_PAGE_SHIFT;
@@ -164,7 +171,10 @@ static void __init do_add_efi_memmap(void)
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
case EFI_CONVENTIONAL_MEMORY:
- if (md->attribute & EFI_MEMORY_WB)
+ if (efi_soft_reserve_enabled()
+ && (md->attribute & EFI_MEMORY_SP))
+ e820_type = E820_TYPE_SOFT_RESERVED;
+ else if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_TYPE_RAM;
else
e820_type = E820_TYPE_RESERVED;
@@ -190,11 +200,36 @@ static void __init do_add_efi_memmap(void)
e820_type = E820_TYPE_RESERVED;
break;
}
+
e820__range_add(start, size, e820_type);
}
e820__update_table(e820_table);
}
+/*
+ * Given add_efi_memmap defaults to 0 and there there is no alternative
+ * e820 mechanism for soft-reserved memory, import the full EFI memory
+ * map if soft reservations are present and enabled. Otherwise, the
+ * mechanism to disable the kernel's consideration of EFI_MEMORY_SP is
+ * the efi=nosoftreserve option.
+ */
+static bool do_efi_soft_reserve(void)
+{
+ efi_memory_desc_t *md;
+
+ if (!efi_enabled(EFI_MEMMAP))
+ return false;
+
+ if (!efi_soft_reserve_enabled())
+ return false;
+
+ for_each_efi_memory_desc(md)
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ (md->attribute & EFI_MEMORY_SP))
+ return true;
+ return false;
+}
+
int __init efi_memblock_x86_reserve_range(void)
{
struct efi_info *e = &boot_params.efi_info;
@@ -224,9 +259,11 @@ int __init efi_memblock_x86_reserve_range(void)
if (rv)
return rv;
- if (add_efi_memmap)
+ if (add_efi_memmap || do_efi_soft_reserve())
do_add_efi_memmap();
+ efi_fake_memmap_early();
+
WARN(efi.memmap.desc_version != 1,
"Unexpected EFI_MEMORY_DESCRIPTOR version %ld",
efi.memmap.desc_version);
@@ -779,6 +816,15 @@ static bool should_map_region(efi_memory_desc_t *md)
return false;
/*
+ * EFI specific purpose memory may be reserved by default
+ * depending on kernel config and boot options.
+ */
+ if (md->type == EFI_CONVENTIONAL_MEMORY &&
+ efi_soft_reserve_enabled() &&
+ (md->attribute & EFI_MEMORY_SP))
+ return false;
+
+ /*
* Map all of RAM so that we can access arguments in the 1:1
* mapping when making EFI runtime calls.
*/
diff --git a/arch/x86/platform/efi/efi_stub_32.S b/arch/x86/platform/efi/efi_stub_32.S
index ab2e91e76894..eed8b5b441f8 100644
--- a/arch/x86/platform/efi/efi_stub_32.S
+++ b/arch/x86/platform/efi/efi_stub_32.S
@@ -22,7 +22,7 @@
*/
.text
-ENTRY(efi_call_phys)
+SYM_FUNC_START(efi_call_phys)
/*
* 0. The function can only be called in Linux kernel. So CS has been
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
@@ -114,7 +114,7 @@ ENTRY(efi_call_phys)
movl (%edx), %ecx
pushl %ecx
ret
-ENDPROC(efi_call_phys)
+SYM_FUNC_END(efi_call_phys)
.previous
.data
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S
index 74628ec78f29..b1d2313fe3bf 100644
--- a/arch/x86/platform/efi/efi_stub_64.S
+++ b/arch/x86/platform/efi/efi_stub_64.S
@@ -39,7 +39,7 @@
mov %rsi, %cr0; \
mov (%rsp), %rsp
-ENTRY(efi_call)
+SYM_FUNC_START(efi_call)
pushq %rbp
movq %rsp, %rbp
SAVE_XMM
@@ -55,4 +55,4 @@ ENTRY(efi_call)
RESTORE_XMM
popq %rbp
ret
-ENDPROC(efi_call)
+SYM_FUNC_END(efi_call)
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S
index 46c58b08739c..3189f1394701 100644
--- a/arch/x86/platform/efi/efi_thunk_64.S
+++ b/arch/x86/platform/efi/efi_thunk_64.S
@@ -25,7 +25,7 @@
.text
.code64
-ENTRY(efi64_thunk)
+SYM_FUNC_START(efi64_thunk)
push %rbp
push %rbx
@@ -60,14 +60,14 @@ ENTRY(efi64_thunk)
pop %rbx
pop %rbp
retq
-ENDPROC(efi64_thunk)
+SYM_FUNC_END(efi64_thunk)
/*
* We run this function from the 1:1 mapping.
*
* This function must be invoked with a 1:1 mapped stack.
*/
-ENTRY(__efi64_thunk)
+SYM_FUNC_START_LOCAL(__efi64_thunk)
movl %ds, %eax
push %rax
movl %es, %eax
@@ -114,14 +114,14 @@ ENTRY(__efi64_thunk)
or %rcx, %rax
1:
ret
-ENDPROC(__efi64_thunk)
+SYM_FUNC_END(__efi64_thunk)
-ENTRY(efi_exit32)
+SYM_FUNC_START_LOCAL(efi_exit32)
movq func_rt_ptr(%rip), %rax
push %rax
mov %rdi, %rax
ret
-ENDPROC(efi_exit32)
+SYM_FUNC_END(efi_exit32)
.code32
/*
@@ -129,7 +129,7 @@ ENDPROC(efi_exit32)
*
* The stack should represent the 32-bit calling convention.
*/
-ENTRY(efi_enter32)
+SYM_FUNC_START_LOCAL(efi_enter32)
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
@@ -145,7 +145,7 @@ ENTRY(efi_enter32)
pushl %eax
lret
-ENDPROC(efi_enter32)
+SYM_FUNC_END(efi_enter32)
.data
.balign 8
diff --git a/arch/x86/platform/efi/quirks.c b/arch/x86/platform/efi/quirks.c
index 3b9fd679cea9..7675cf754d90 100644
--- a/arch/x86/platform/efi/quirks.c
+++ b/arch/x86/platform/efi/quirks.c
@@ -320,6 +320,9 @@ void __init efi_reserve_boot_services(void)
{
efi_memory_desc_t *md;
+ if (!efi_enabled(EFI_MEMMAP))
+ return;
+
for_each_efi_memory_desc(md) {
u64 start = md->phys_addr;
u64 size = md->num_pages << EFI_PAGE_SHIFT;
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c
index 6d193bb36021..089413cd944e 100644
--- a/arch/x86/platform/olpc/olpc-xo15-sci.c
+++ b/arch/x86/platform/olpc/olpc-xo15-sci.c
@@ -39,7 +39,7 @@ static int set_lid_wake_behavior(bool wake_on_close)
status = acpi_execute_simple_method(NULL, "\\_SB.PCI0.LID.LIDW", wake_on_close);
if (ACPI_FAILURE(status)) {
- pr_warning(PFX "failed to set lid behavior\n");
+ pr_warn(PFX "failed to set lid behavior\n");
return 1;
}
diff --git a/arch/x86/platform/olpc/xo1-wakeup.S b/arch/x86/platform/olpc/xo1-wakeup.S
index 5fee3a2c2fd4..75f4faff8468 100644
--- a/arch/x86/platform/olpc/xo1-wakeup.S
+++ b/arch/x86/platform/olpc/xo1-wakeup.S
@@ -90,7 +90,7 @@ restore_registers:
ret
-ENTRY(do_olpc_suspend_lowlevel)
+SYM_CODE_START(do_olpc_suspend_lowlevel)
call save_processor_state
call save_registers
@@ -110,6 +110,7 @@ ret_point:
call restore_registers
call restore_processor_state
ret
+SYM_CODE_END(do_olpc_suspend_lowlevel)
.data
saved_gdt: .long 0,0
diff --git a/arch/x86/platform/pvh/head.S b/arch/x86/platform/pvh/head.S
index 1f8825bbaffb..43b4d864817e 100644
--- a/arch/x86/platform/pvh/head.S
+++ b/arch/x86/platform/pvh/head.S
@@ -50,7 +50,7 @@
#define PVH_DS_SEL (PVH_GDT_ENTRY_DS * 8)
#define PVH_CANARY_SEL (PVH_GDT_ENTRY_CANARY * 8)
-ENTRY(pvh_start_xen)
+SYM_CODE_START_LOCAL(pvh_start_xen)
cld
lgdt (_pa(gdt))
@@ -146,15 +146,16 @@ ENTRY(pvh_start_xen)
ljmp $PVH_CS_SEL, $_pa(startup_32)
#endif
-END(pvh_start_xen)
+SYM_CODE_END(pvh_start_xen)
.section ".init.data","aw"
.balign 8
-gdt:
+SYM_DATA_START_LOCAL(gdt)
.word gdt_end - gdt_start
.long _pa(gdt_start)
.word 0
-gdt_start:
+SYM_DATA_END(gdt)
+SYM_DATA_START_LOCAL(gdt_start)
.quad 0x0000000000000000 /* NULL descriptor */
#ifdef CONFIG_X86_64
.quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* PVH_CS_SEL */
@@ -163,15 +164,14 @@ gdt_start:
#endif
.quad GDT_ENTRY(0xc092, 0, 0xfffff) /* PVH_DS_SEL */
.quad GDT_ENTRY(0x4090, 0, 0x18) /* PVH_CANARY_SEL */
-gdt_end:
+SYM_DATA_END_LABEL(gdt_start, SYM_L_LOCAL, gdt_end)
.balign 16
-canary:
- .fill 48, 1, 0
+SYM_DATA_LOCAL(canary, .fill 48, 1, 0)
-early_stack:
+SYM_DATA_START_LOCAL(early_stack)
.fill BOOT_STACK_SIZE, 1, 0
-early_stack_end:
+SYM_DATA_END_LABEL(early_stack, SYM_L_LOCAL, early_stack_end)
ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY,
_ASM_PTR (pvh_start_xen - __START_KERNEL_map))
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
index bf6016f8db4e..6259563760f9 100644
--- a/arch/x86/platform/sfi/sfi.c
+++ b/arch/x86/platform/sfi/sfi.c
@@ -26,8 +26,7 @@ static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
static void __init mp_sfi_register_lapic(u8 id)
{
if (MAX_LOCAL_APIC - id <= 0) {
- pr_warning("Processor #%d invalid (max %d)\n",
- id, MAX_LOCAL_APIC);
+ pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC);
return;
}
diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c
index c2ee31953372..ece9cb9c1189 100644
--- a/arch/x86/platform/uv/bios_uv.c
+++ b/arch/x86/platform/uv/bios_uv.c
@@ -184,20 +184,20 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus)
}
EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target);
-void uv_bios_init(void)
+int uv_bios_init(void)
{
uv_systab = NULL;
if ((uv_systab_phys == EFI_INVALID_TABLE_ADDR) ||
!uv_systab_phys || efi_runtime_disabled()) {
pr_crit("UV: UVsystab: missing\n");
- return;
+ return -EEXIST;
}
uv_systab = ioremap(uv_systab_phys, sizeof(struct uv_systab));
if (!uv_systab || strncmp(uv_systab->signature, UV_SYSTAB_SIG, 4)) {
pr_err("UV: UVsystab: bad signature!\n");
iounmap(uv_systab);
- return;
+ return -EINVAL;
}
/* Starting with UV4 the UV systab size is variable */
@@ -208,8 +208,9 @@ void uv_bios_init(void)
uv_systab = ioremap(uv_systab_phys, size);
if (!uv_systab) {
pr_err("UV: UVsystab: ioremap(%d) failed!\n", size);
- return;
+ return -EFAULT;
}
}
pr_info("UV: UVsystab: Revision:%x\n", uv_systab->revision);
+ return 0;
}
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 6fe383002125..8786653ad3c0 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -16,7 +16,7 @@
.text
-ENTRY(swsusp_arch_suspend)
+SYM_FUNC_START(swsusp_arch_suspend)
movl %esp, saved_context_esp
movl %ebx, saved_context_ebx
movl %ebp, saved_context_ebp
@@ -33,9 +33,9 @@ ENTRY(swsusp_arch_suspend)
call swsusp_save
FRAME_END
ret
-ENDPROC(swsusp_arch_suspend)
+SYM_FUNC_END(swsusp_arch_suspend)
-ENTRY(restore_image)
+SYM_CODE_START(restore_image)
/* prepare to jump to the image kernel */
movl restore_jump_address, %ebx
movl restore_cr3, %ebp
@@ -45,9 +45,10 @@ ENTRY(restore_image)
/* jump to relocated restore code */
movl relocated_restore_code, %eax
jmpl *%eax
+SYM_CODE_END(restore_image)
/* code below has been relocated to a safe page */
-ENTRY(core_restore_code)
+SYM_CODE_START(core_restore_code)
movl temp_pgt, %eax
movl %eax, %cr3
@@ -77,10 +78,11 @@ copy_loop:
done:
jmpl *%ebx
+SYM_CODE_END(core_restore_code)
/* code below belongs to the image kernel */
.align PAGE_SIZE
-ENTRY(restore_registers)
+SYM_FUNC_START(restore_registers)
/* go back to the original page tables */
movl %ebp, %cr3
movl mmu_cr4_features, %ecx
@@ -107,4 +109,4 @@ ENTRY(restore_registers)
movl %eax, in_suspend
ret
-ENDPROC(restore_registers)
+SYM_FUNC_END(restore_registers)
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index a4d5eb0a7ece..7918b8415f13 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -22,7 +22,7 @@
#include <asm/processor-flags.h>
#include <asm/frame.h>
-ENTRY(swsusp_arch_suspend)
+SYM_FUNC_START(swsusp_arch_suspend)
movq $saved_context, %rax
movq %rsp, pt_regs_sp(%rax)
movq %rbp, pt_regs_bp(%rax)
@@ -50,9 +50,9 @@ ENTRY(swsusp_arch_suspend)
call swsusp_save
FRAME_END
ret
-ENDPROC(swsusp_arch_suspend)
+SYM_FUNC_END(swsusp_arch_suspend)
-ENTRY(restore_image)
+SYM_CODE_START(restore_image)
/* prepare to jump to the image kernel */
movq restore_jump_address(%rip), %r8
movq restore_cr3(%rip), %r9
@@ -67,9 +67,10 @@ ENTRY(restore_image)
/* jump to relocated restore code */
movq relocated_restore_code(%rip), %rcx
jmpq *%rcx
+SYM_CODE_END(restore_image)
/* code below has been relocated to a safe page */
-ENTRY(core_restore_code)
+SYM_CODE_START(core_restore_code)
/* switch to temporary page tables */
movq %rax, %cr3
/* flush TLB */
@@ -97,10 +98,11 @@ ENTRY(core_restore_code)
.Ldone:
/* jump to the restore_registers address from the image header */
jmpq *%r8
+SYM_CODE_END(core_restore_code)
/* code below belongs to the image kernel */
.align PAGE_SIZE
-ENTRY(restore_registers)
+SYM_FUNC_START(restore_registers)
/* go back to the original page tables */
movq %r9, %cr3
@@ -142,4 +144,4 @@ ENTRY(restore_registers)
movq %rax, in_suspend(%rip)
ret
-ENDPROC(restore_registers)
+SYM_FUNC_END(restore_registers)
diff --git a/arch/x86/purgatory/entry64.S b/arch/x86/purgatory/entry64.S
index 275a646d1048..0b4390ce586b 100644
--- a/arch/x86/purgatory/entry64.S
+++ b/arch/x86/purgatory/entry64.S
@@ -8,13 +8,13 @@
* This code has been taken from kexec-tools.
*/
+#include <linux/linkage.h>
+
.text
.balign 16
.code64
- .globl entry64, entry64_regs
-
-entry64:
+SYM_CODE_START(entry64)
/* Setup a gdt that should be preserved */
lgdt gdt(%rip)
@@ -54,10 +54,11 @@ new_cs_exit:
/* Jump to the new code... */
jmpq *rip(%rip)
+SYM_CODE_END(entry64)
.section ".rodata"
.balign 4
-entry64_regs:
+SYM_DATA_START(entry64_regs)
rax: .quad 0x0
rcx: .quad 0x0
rdx: .quad 0x0
@@ -75,13 +76,14 @@ r13: .quad 0x0
r14: .quad 0x0
r15: .quad 0x0
rip: .quad 0x0
- .size entry64_regs, . - entry64_regs
+SYM_DATA_END(entry64_regs)
/* GDT */
.section ".rodata"
.balign 16
-gdt:
- /* 0x00 unusable segment
+SYM_DATA_START_LOCAL(gdt)
+ /*
+ * 0x00 unusable segment
* 0x08 unused
* so use them as gdt ptr
*/
@@ -94,6 +96,8 @@ gdt:
/* 0x18 4GB flat data segment */
.word 0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
-stack: .quad 0, 0
-stack_init:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
+
+SYM_DATA_START_LOCAL(stack)
+ .quad 0, 0
+SYM_DATA_END_LABEL(stack, SYM_L_LOCAL, stack_init)
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index 3b95410ff0f8..2961234d0795 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -14,28 +14,10 @@
#include "../boot/string.h"
-unsigned long purgatory_backup_dest __section(.kexec-purgatory);
-unsigned long purgatory_backup_src __section(.kexec-purgatory);
-unsigned long purgatory_backup_sz __section(.kexec-purgatory);
-
u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory);
struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory);
-/*
- * On x86, second kernel requries first 640K of memory to boot. Copy
- * first 640K to a backup region in reserved memory range so that second
- * kernel can use first 640K.
- */
-static int copy_backup_region(void)
-{
- if (purgatory_backup_dest) {
- memcpy((void *)purgatory_backup_dest,
- (void *)purgatory_backup_src, purgatory_backup_sz);
- }
- return 0;
-}
-
static int verify_sha256_digest(void)
{
struct kexec_sha_region *ptr, *end;
@@ -66,7 +48,6 @@ void purgatory(void)
for (;;)
;
}
- copy_backup_region();
}
/*
diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S
index 321146be741d..89d9e9e53fcd 100644
--- a/arch/x86/purgatory/setup-x86_64.S
+++ b/arch/x86/purgatory/setup-x86_64.S
@@ -7,14 +7,14 @@
*
* This code has been taken from kexec-tools.
*/
+#include <linux/linkage.h>
#include <asm/purgatory.h>
.text
- .globl purgatory_start
.balign 16
-purgatory_start:
.code64
+SYM_CODE_START(purgatory_start)
/* Load a gdt so I know what the segment registers are */
lgdt gdt(%rip)
@@ -32,10 +32,12 @@ purgatory_start:
/* Call the C code */
call purgatory
jmp entry64
+SYM_CODE_END(purgatory_start)
.section ".rodata"
.balign 16
-gdt: /* 0x00 unusable segment
+SYM_DATA_START_LOCAL(gdt)
+ /* 0x00 unusable segment
* 0x08 unused
* so use them as the gdt ptr
*/
@@ -48,10 +50,10 @@ gdt: /* 0x00 unusable segment
/* 0x18 4GB flat data segment */
.word 0xFFFF, 0x0000, 0x9200, 0x00CF
-gdt_end:
+SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
.bss
.balign 4096
-lstack:
+SYM_DATA_START_LOCAL(lstack)
.skip 4096
-lstack_end:
+SYM_DATA_END_LABEL(lstack, SYM_L_LOCAL, lstack_end)
diff --git a/arch/x86/purgatory/stack.S b/arch/x86/purgatory/stack.S
index 8b1427422dfc..1ef507ca50a5 100644
--- a/arch/x86/purgatory/stack.S
+++ b/arch/x86/purgatory/stack.S
@@ -5,13 +5,14 @@
* Copyright (C) 2014 Red Hat Inc.
*/
+#include <linux/linkage.h>
+
/* A stack for the loaded kernel.
* Separate and in the data section so it can be prepopulated.
*/
.data
.balign 4096
- .globl stack, stack_end
-stack:
+SYM_DATA_START(stack)
.skip 4096
-stack_end:
+SYM_DATA_END_LABEL(stack, SYM_L_GLOBAL, stack_end)
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c
index 7dce39c8c034..262f83cad355 100644
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -8,6 +8,7 @@
#include <asm/pgtable.h>
#include <asm/realmode.h>
#include <asm/tlbflush.h>
+#include <asm/crash.h>
struct real_mode_header *real_mode_header;
u32 *trampoline_cr4_features;
@@ -34,6 +35,7 @@ void __init reserve_real_mode(void)
memblock_reserve(mem, size);
set_real_mode_mem(mem);
+ crash_reserve_low_1M();
}
static void __init setup_real_mode(void)
diff --git a/arch/x86/realmode/rm/header.S b/arch/x86/realmode/rm/header.S
index 6363761cc74c..af04512c02d9 100644
--- a/arch/x86/realmode/rm/header.S
+++ b/arch/x86/realmode/rm/header.S
@@ -14,7 +14,7 @@
.section ".header", "a"
.balign 16
-GLOBAL(real_mode_header)
+SYM_DATA_START(real_mode_header)
.long pa_text_start
.long pa_ro_end
/* SMP trampoline */
@@ -33,11 +33,9 @@ GLOBAL(real_mode_header)
#ifdef CONFIG_X86_64
.long __KERNEL32_CS
#endif
-END(real_mode_header)
+SYM_DATA_END(real_mode_header)
/* End signature, used to verify integrity */
.section ".signature","a"
.balign 4
-GLOBAL(end_signature)
- .long REALMODE_END_SIGNATURE
-END(end_signature)
+SYM_DATA(end_signature, .long REALMODE_END_SIGNATURE)
diff --git a/arch/x86/realmode/rm/realmode.lds.S b/arch/x86/realmode/rm/realmode.lds.S
index 3bb980800c58..64d135d1ee63 100644
--- a/arch/x86/realmode/rm/realmode.lds.S
+++ b/arch/x86/realmode/rm/realmode.lds.S
@@ -11,6 +11,7 @@
OUTPUT_FORMAT("elf32-i386")
OUTPUT_ARCH(i386)
+ENTRY(pa_text_start)
SECTIONS
{
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S
index cd2f97b9623b..f10515b10e0a 100644
--- a/arch/x86/realmode/rm/reboot.S
+++ b/arch/x86/realmode/rm/reboot.S
@@ -19,7 +19,7 @@
*/
.section ".text32", "ax"
.code32
-ENTRY(machine_real_restart_asm)
+SYM_CODE_START(machine_real_restart_asm)
#ifdef CONFIG_X86_64
/* Switch to trampoline GDT as it is guaranteed < 4 GiB */
@@ -33,7 +33,7 @@ ENTRY(machine_real_restart_asm)
movl %eax, %cr0
ljmpl $__KERNEL32_CS, $pa_machine_real_restart_paging_off
-GLOBAL(machine_real_restart_paging_off)
+SYM_INNER_LABEL(machine_real_restart_paging_off, SYM_L_GLOBAL)
xorl %eax, %eax
xorl %edx, %edx
movl $MSR_EFER, %ecx
@@ -63,6 +63,7 @@ GLOBAL(machine_real_restart_paging_off)
movl %ecx, %gs
movl %ecx, %ss
ljmpw $8, $1f
+SYM_CODE_END(machine_real_restart_asm)
/*
* This is 16-bit protected mode code to disable paging and the cache,
@@ -127,13 +128,13 @@ bios:
.section ".rodata", "a"
.balign 16
-GLOBAL(machine_real_restart_idt)
+SYM_DATA_START(machine_real_restart_idt)
.word 0xffff /* Length - real mode default value */
.long 0 /* Base - real mode default value */
-END(machine_real_restart_idt)
+SYM_DATA_END(machine_real_restart_idt)
.balign 16
-GLOBAL(machine_real_restart_gdt)
+SYM_DATA_START(machine_real_restart_gdt)
/* Self-pointer */
.word 0xffff /* Length - real mode default value */
.long pa_machine_real_restart_gdt
@@ -153,4 +154,4 @@ GLOBAL(machine_real_restart_gdt)
* semantics we don't have to reload the segments once CR0.PE = 0.
*/
.quad GDT_ENTRY(0x0093, 0x100, 0xffff)
-END(machine_real_restart_gdt)
+SYM_DATA_END(machine_real_restart_gdt)
diff --git a/arch/x86/realmode/rm/stack.S b/arch/x86/realmode/rm/stack.S
index 8d4cb64799ea..0fca64061ad2 100644
--- a/arch/x86/realmode/rm/stack.S
+++ b/arch/x86/realmode/rm/stack.S
@@ -6,15 +6,13 @@
#include <linux/linkage.h>
.data
-GLOBAL(HEAP)
- .long rm_heap
-GLOBAL(heap_end)
- .long rm_stack
+SYM_DATA(HEAP, .long rm_heap)
+SYM_DATA(heap_end, .long rm_stack)
.bss
.balign 16
-GLOBAL(rm_heap)
- .space 2048
-GLOBAL(rm_stack)
+SYM_DATA(rm_heap, .space 2048)
+
+SYM_DATA_START(rm_stack)
.space 2048
-GLOBAL(rm_stack_end)
+SYM_DATA_END_LABEL(rm_stack, SYM_L_GLOBAL, rm_stack_end)
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S
index 1868b158480d..3fad907a179f 100644
--- a/arch/x86/realmode/rm/trampoline_32.S
+++ b/arch/x86/realmode/rm/trampoline_32.S
@@ -29,7 +29,7 @@
.code16
.balign PAGE_SIZE
-ENTRY(trampoline_start)
+SYM_CODE_START(trampoline_start)
wbinvd # Needed for NUMA-Q should be harmless for others
LJMPW_RM(1f)
@@ -54,18 +54,20 @@ ENTRY(trampoline_start)
lmsw %dx # into protected mode
ljmpl $__BOOT_CS, $pa_startup_32
+SYM_CODE_END(trampoline_start)
.section ".text32","ax"
.code32
-ENTRY(startup_32) # note: also used from wakeup_asm.S
+SYM_CODE_START(startup_32) # note: also used from wakeup_asm.S
jmp *%eax
+SYM_CODE_END(startup_32)
.bss
.balign 8
-GLOBAL(trampoline_header)
- tr_start: .space 4
- tr_gdt_pad: .space 2
- tr_gdt: .space 6
-END(trampoline_header)
+SYM_DATA_START(trampoline_header)
+ SYM_DATA_LOCAL(tr_start, .space 4)
+ SYM_DATA_LOCAL(tr_gdt_pad, .space 2)
+ SYM_DATA_LOCAL(tr_gdt, .space 6)
+SYM_DATA_END(trampoline_header)
#include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S
index aee2b45d83b8..251758ed7443 100644
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -38,7 +38,7 @@
.code16
.balign PAGE_SIZE
-ENTRY(trampoline_start)
+SYM_CODE_START(trampoline_start)
cli # We should be safe anyway
wbinvd
@@ -78,12 +78,14 @@ ENTRY(trampoline_start)
no_longmode:
hlt
jmp no_longmode
+SYM_CODE_END(trampoline_start)
+
#include "../kernel/verify_cpu.S"
.section ".text32","ax"
.code32
.balign 4
-ENTRY(startup_32)
+SYM_CODE_START(startup_32)
movl %edx, %ss
addl $pa_real_mode_base, %esp
movl %edx, %ds
@@ -137,38 +139,39 @@ ENTRY(startup_32)
* the new gdt/idt that has __KERNEL_CS with CS.L = 1.
*/
ljmpl $__KERNEL_CS, $pa_startup_64
+SYM_CODE_END(startup_32)
.section ".text64","ax"
.code64
.balign 4
-ENTRY(startup_64)
+SYM_CODE_START(startup_64)
# Now jump into the kernel using virtual addresses
jmpq *tr_start(%rip)
+SYM_CODE_END(startup_64)
.section ".rodata","a"
# Duplicate the global descriptor table
# so the kernel can live anywhere
.balign 16
- .globl tr_gdt
-tr_gdt:
+SYM_DATA_START(tr_gdt)
.short tr_gdt_end - tr_gdt - 1 # gdt limit
.long pa_tr_gdt
.short 0
.quad 0x00cf9b000000ffff # __KERNEL32_CS
.quad 0x00af9b000000ffff # __KERNEL_CS
.quad 0x00cf93000000ffff # __KERNEL_DS
-tr_gdt_end:
+SYM_DATA_END_LABEL(tr_gdt, SYM_L_LOCAL, tr_gdt_end)
.bss
.balign PAGE_SIZE
-GLOBAL(trampoline_pgd) .space PAGE_SIZE
+SYM_DATA(trampoline_pgd, .space PAGE_SIZE)
.balign 8
-GLOBAL(trampoline_header)
- tr_start: .space 8
- GLOBAL(tr_efer) .space 8
- GLOBAL(tr_cr4) .space 4
- GLOBAL(tr_flags) .space 4
-END(trampoline_header)
+SYM_DATA_START(trampoline_header)
+ SYM_DATA_LOCAL(tr_start, .space 8)
+ SYM_DATA(tr_efer, .space 8)
+ SYM_DATA(tr_cr4, .space 4)
+ SYM_DATA(tr_flags, .space 4)
+SYM_DATA_END(trampoline_header)
#include "trampoline_common.S"
diff --git a/arch/x86/realmode/rm/trampoline_common.S b/arch/x86/realmode/rm/trampoline_common.S
index 8d8208dcca24..5033e640f957 100644
--- a/arch/x86/realmode/rm/trampoline_common.S
+++ b/arch/x86/realmode/rm/trampoline_common.S
@@ -1,4 +1,4 @@
/* SPDX-License-Identifier: GPL-2.0 */
.section ".rodata","a"
.balign 16
-tr_idt: .fill 1, 6, 0
+SYM_DATA_LOCAL(tr_idt, .fill 1, 6, 0)
diff --git a/arch/x86/realmode/rm/wakeup_asm.S b/arch/x86/realmode/rm/wakeup_asm.S
index 05ac9c17c811..02d0ba16ae33 100644
--- a/arch/x86/realmode/rm/wakeup_asm.S
+++ b/arch/x86/realmode/rm/wakeup_asm.S
@@ -17,7 +17,7 @@
.section ".data", "aw"
.balign 16
-GLOBAL(wakeup_header)
+SYM_DATA_START(wakeup_header)
video_mode: .short 0 /* Video mode number */
pmode_entry: .long 0
pmode_cs: .short __KERNEL_CS
@@ -31,13 +31,13 @@ GLOBAL(wakeup_header)
realmode_flags: .long 0
real_magic: .long 0
signature: .long WAKEUP_HEADER_SIGNATURE
-END(wakeup_header)
+SYM_DATA_END(wakeup_header)
.text
.code16
.balign 16
-ENTRY(wakeup_start)
+SYM_CODE_START(wakeup_start)
cli
cld
@@ -73,7 +73,7 @@ ENTRY(wakeup_start)
movw %ax, %fs
movw %ax, %gs
- lidtl wakeup_idt
+ lidtl .Lwakeup_idt
/* Clear the EFLAGS */
pushl $0
@@ -135,6 +135,7 @@ ENTRY(wakeup_start)
#else
jmp trampoline_start
#endif
+SYM_CODE_END(wakeup_start)
bogus_real_magic:
1:
@@ -152,7 +153,7 @@ bogus_real_magic:
*/
.balign 16
-GLOBAL(wakeup_gdt)
+SYM_DATA_START(wakeup_gdt)
.word 3*8-1 /* Self-descriptor */
.long pa_wakeup_gdt
.word 0
@@ -164,15 +165,15 @@ GLOBAL(wakeup_gdt)
.word 0xffff /* 16-bit data segment @ real_mode_base */
.long 0x93000000 + pa_real_mode_base
.word 0x008f /* big real mode */
-END(wakeup_gdt)
+SYM_DATA_END(wakeup_gdt)
.section ".rodata","a"
.balign 8
/* This is the standard real-mode IDT */
.balign 16
-GLOBAL(wakeup_idt)
+SYM_DATA_START_LOCAL(.Lwakeup_idt)
.word 0xffff /* limit */
.long 0 /* address */
.word 0
-END(wakeup_idt)
+SYM_DATA_END(.Lwakeup_idt)
diff --git a/arch/x86/realmode/rmpiggy.S b/arch/x86/realmode/rmpiggy.S
index c078dba40cef..c8fef76743f6 100644
--- a/arch/x86/realmode/rmpiggy.S
+++ b/arch/x86/realmode/rmpiggy.S
@@ -10,12 +10,10 @@
.balign PAGE_SIZE
-GLOBAL(real_mode_blob)
+SYM_DATA_START(real_mode_blob)
.incbin "arch/x86/realmode/rm/realmode.bin"
-END(real_mode_blob)
+SYM_DATA_END_LABEL(real_mode_blob, SYM_L_GLOBAL, real_mode_blob_end)
-GLOBAL(real_mode_blob_end);
-
-GLOBAL(real_mode_relocs)
+SYM_DATA_START(real_mode_relocs)
.incbin "arch/x86/realmode/rm/realmode.relocs"
-END(real_mode_relocs)
+SYM_DATA_END(real_mode_relocs)
diff --git a/arch/x86/tools/gen-insn-attr-x86.awk b/arch/x86/tools/gen-insn-attr-x86.awk
index b02a36b2c14f..a42015b305f4 100644
--- a/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/arch/x86/tools/gen-insn-attr-x86.awk
@@ -69,7 +69,7 @@ BEGIN {
lprefix1_expr = "\\((66|!F3)\\)"
lprefix2_expr = "\\(F3\\)"
- lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+ lprefix3_expr = "\\((F2|!F3|66&F2)\\)"
lprefix_expr = "\\((66|F2|F3)\\)"
max_lprefix = 4
@@ -257,7 +257,7 @@ function convert_operands(count,opnd, i,j,imm,mod)
return add_flags(imm, mod)
}
-/^[0-9a-f]+\:/ {
+/^[0-9a-f]+:/ {
if (NR == 1)
next
# get index
diff --git a/arch/x86/um/vdso/vdso.S b/arch/x86/um/vdso/vdso.S
index a4a3870dc059..a6eaf293a73b 100644
--- a/arch/x86/um/vdso/vdso.S
+++ b/arch/x86/um/vdso/vdso.S
@@ -1,11 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/init.h>
+#include <linux/linkage.h>
__INITDATA
- .globl vdso_start, vdso_end
-vdso_start:
+SYM_DATA_START(vdso_start)
.incbin "arch/x86/um/vdso/vdso.so"
-vdso_end:
+SYM_DATA_END_LABEL(vdso_start, SYM_L_GLOBAL, vdso_end)
__FINIT
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 5bfea374a160..ae4a41ca19f6 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -837,15 +837,6 @@ static void xen_load_sp0(unsigned long sp0)
this_cpu_write(cpu_tss_rw.x86_tss.sp0, sp0);
}
-void xen_set_iopl_mask(unsigned mask)
-{
- struct physdev_set_iopl set_iopl;
-
- /* Force the change at ring 0. */
- set_iopl.iopl = (mask == 0) ? 1 : (mask >> 12) & 3;
- HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
-}
-
static void xen_io_delay(void)
{
}
@@ -1055,7 +1046,6 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
.write_idt_entry = xen_write_idt_entry,
.load_sp0 = xen_load_sp0,
- .set_iopl_mask = xen_set_iopl_mask,
.io_delay = xen_io_delay,
/* Xen takes care of %gs when switching to usermode for us */
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 548d1e0a5ba1..33b0e20df7fc 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -412,7 +412,7 @@ static unsigned long __init xen_set_identity_and_remap_chunk(
remap_range_size = xen_find_pfn_range(&remap_pfn);
if (!remap_range_size) {
- pr_warning("Unable to find available pfn range, not remapping identity pages\n");
+ pr_warn("Unable to find available pfn range, not remapping identity pages\n");
xen_set_identity_and_release_chunk(cur_pfn,
cur_pfn + left, nr_pages);
break;
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S
index be104eef80be..508fe204520b 100644
--- a/arch/x86/xen/xen-asm.S
+++ b/arch/x86/xen/xen-asm.S
@@ -19,7 +19,7 @@
* event status with one and operation. If there are pending events,
* then enter the hypervisor to get them handled.
*/
-ENTRY(xen_irq_enable_direct)
+SYM_FUNC_START(xen_irq_enable_direct)
FRAME_BEGIN
/* Unmask events */
movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
@@ -38,17 +38,17 @@ ENTRY(xen_irq_enable_direct)
1:
FRAME_END
ret
- ENDPROC(xen_irq_enable_direct)
+SYM_FUNC_END(xen_irq_enable_direct)
/*
* Disabling events is simply a matter of making the event mask
* non-zero.
*/
-ENTRY(xen_irq_disable_direct)
+SYM_FUNC_START(xen_irq_disable_direct)
movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
ret
-ENDPROC(xen_irq_disable_direct)
+SYM_FUNC_END(xen_irq_disable_direct)
/*
* (xen_)save_fl is used to get the current interrupt enable status.
@@ -59,12 +59,12 @@ ENDPROC(xen_irq_disable_direct)
* undefined. We need to toggle the state of the bit, because Xen and
* x86 use opposite senses (mask vs enable).
*/
-ENTRY(xen_save_fl_direct)
+SYM_FUNC_START(xen_save_fl_direct)
testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
setz %ah
addb %ah, %ah
ret
- ENDPROC(xen_save_fl_direct)
+SYM_FUNC_END(xen_save_fl_direct)
/*
@@ -74,7 +74,7 @@ ENTRY(xen_save_fl_direct)
* interrupt mask state, it checks for unmasked pending events and
* enters the hypervisor to get them delivered if so.
*/
-ENTRY(xen_restore_fl_direct)
+SYM_FUNC_START(xen_restore_fl_direct)
FRAME_BEGIN
#ifdef CONFIG_X86_64
testw $X86_EFLAGS_IF, %di
@@ -95,14 +95,14 @@ ENTRY(xen_restore_fl_direct)
1:
FRAME_END
ret
- ENDPROC(xen_restore_fl_direct)
+SYM_FUNC_END(xen_restore_fl_direct)
/*
* Force an event check by making a hypercall, but preserve regs
* before making the call.
*/
-ENTRY(check_events)
+SYM_FUNC_START(check_events)
FRAME_BEGIN
#ifdef CONFIG_X86_32
push %eax
@@ -135,19 +135,19 @@ ENTRY(check_events)
#endif
FRAME_END
ret
-ENDPROC(check_events)
+SYM_FUNC_END(check_events)
-ENTRY(xen_read_cr2)
+SYM_FUNC_START(xen_read_cr2)
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu), %_ASM_AX
_ASM_MOV XEN_vcpu_info_arch_cr2(%_ASM_AX), %_ASM_AX
FRAME_END
ret
- ENDPROC(xen_read_cr2);
+SYM_FUNC_END(xen_read_cr2);
-ENTRY(xen_read_cr2_direct)
+SYM_FUNC_START(xen_read_cr2_direct)
FRAME_BEGIN
_ASM_MOV PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_arch_cr2, %_ASM_AX
FRAME_END
ret
- ENDPROC(xen_read_cr2_direct);
+SYM_FUNC_END(xen_read_cr2_direct);
diff --git a/arch/x86/xen/xen-asm_32.S b/arch/x86/xen/xen-asm_32.S
index c15db060a242..2712e9155306 100644
--- a/arch/x86/xen/xen-asm_32.S
+++ b/arch/x86/xen/xen-asm_32.S
@@ -56,7 +56,7 @@
_ASM_EXTABLE(1b,2b)
.endm
-ENTRY(xen_iret)
+SYM_CODE_START(xen_iret)
/* test eflags for special cases */
testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp)
jnz hyper_iret
@@ -122,14 +122,14 @@ xen_iret_end_crit:
hyper_iret:
/* put this out of line since its very rarely used */
jmp hypercall_page + __HYPERVISOR_iret * 32
+SYM_CODE_END(xen_iret)
.globl xen_iret_start_crit, xen_iret_end_crit
/*
- * This is called by xen_hypervisor_callback in entry.S when it sees
+ * This is called by xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between
- * xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
- * %eax so we can do a more refined determination of what to do.
+ * xen_iret_start_crit and xen_iret_end_crit.
*
* The stack format at this point is:
* ----------------
@@ -138,70 +138,46 @@ hyper_iret:
* eflags } outer exception info
* cs }
* eip }
- * ---------------- <- edi (copy dest)
- * eax : outer eax if it hasn't been restored
* ----------------
- * eflags } nested exception info
- * cs } (no ss/esp because we're nested
- * eip } from the same ring)
- * orig_eax }<- esi (copy src)
- * - - - - - - - -
- * fs }
- * es }
- * ds } SAVE_ALL state
- * eax }
- * : :
- * ebx }<- esp
+ * eax : outer eax if it hasn't been restored
* ----------------
+ * eflags }
+ * cs } nested exception info
+ * eip }
+ * return address : (into xen_hypervisor_callback)
*
- * In order to deliver the nested exception properly, we need to shift
- * everything from the return addr up to the error code so it sits
- * just under the outer exception info. This means that when we
- * handle the exception, we do it in the context of the outer
- * exception rather than starting a new one.
+ * In order to deliver the nested exception properly, we need to discard the
+ * nested exception frame such that when we handle the exception, we do it
+ * in the context of the outer exception rather than starting a new one.
*
- * The only caveat is that if the outer eax hasn't been restored yet
- * (ie, it's still on stack), we need to insert its value into the
- * SAVE_ALL state before going on, since it's usermode state which we
- * eventually need to restore.
+ * The only caveat is that if the outer eax hasn't been restored yet (i.e.
+ * it's still on stack), we need to restore its value here.
*/
-ENTRY(xen_iret_crit_fixup)
+SYM_CODE_START(xen_iret_crit_fixup)
/*
* Paranoia: Make sure we're really coming from kernel space.
* One could imagine a case where userspace jumps into the
* critical range address, but just before the CPU delivers a
- * GP, it decides to deliver an interrupt instead. Unlikely?
- * Definitely. Easy to avoid? Yes. The Intel documents
- * explicitly say that the reported EIP for a bad jump is the
- * jump instruction itself, not the destination, but some
- * virtual environments get this wrong.
+ * PF, it decides to deliver an interrupt instead. Unlikely?
+ * Definitely. Easy to avoid? Yes.
*/
- movl PT_CS(%esp), %ecx
- andl $SEGMENT_RPL_MASK, %ecx
- cmpl $USER_RPL, %ecx
- je 2f
-
- lea PT_ORIG_EAX(%esp), %esi
- lea PT_EFLAGS(%esp), %edi
+ testb $2, 2*4(%esp) /* nested CS */
+ jnz 2f
/*
* If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
- cmp $iret_restore_end, %eax
+ cmpl $iret_restore_end, 1*4(%esp)
jae 1f
- movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
- movl %eax, PT_EAX(%esp)
+ movl 4*4(%esp), %eax /* load outer EAX */
+ ret $4*4 /* discard nested EIP, CS, and EFLAGS as
+ * well as the just restored EAX */
- lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
-
- /* set up the copy */
-1: std
- mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
- rep movsl
- cld
-
- lea 4(%edi), %esp /* point esp to new frame */
-2: jmp xen_do_upcall
+1:
+ ret $3*4 /* discard nested EIP, CS, and EFLAGS */
+2:
+ ret
+SYM_CODE_END(xen_iret_crit_fixup)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index ebf610b49c06..0a0fd168683a 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -20,11 +20,11 @@
#include <linux/linkage.h>
.macro xen_pv_trap name
-ENTRY(xen_\name)
+SYM_CODE_START(xen_\name)
pop %rcx
pop %r11
jmp \name
-END(xen_\name)
+SYM_CODE_END(xen_\name)
_ASM_NOKPROBE(xen_\name)
.endm
@@ -57,7 +57,7 @@ xen_pv_trap entry_INT80_compat
xen_pv_trap hypervisor_callback
__INIT
-ENTRY(xen_early_idt_handler_array)
+SYM_CODE_START(xen_early_idt_handler_array)
i = 0
.rept NUM_EXCEPTION_VECTORS
pop %rcx
@@ -66,7 +66,7 @@ ENTRY(xen_early_idt_handler_array)
i = i + 1
.fill xen_early_idt_handler_array + i*XEN_EARLY_IDT_HANDLER_SIZE - ., 1, 0xcc
.endr
-END(xen_early_idt_handler_array)
+SYM_CODE_END(xen_early_idt_handler_array)
__FINIT
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
@@ -85,11 +85,12 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
* r11 }<-- pushed by hypercall page
* rsp->rax }
*/
-ENTRY(xen_iret)
+SYM_CODE_START(xen_iret)
pushq $0
jmp hypercall_iret
+SYM_CODE_END(xen_iret)
-ENTRY(xen_sysret64)
+SYM_CODE_START(xen_sysret64)
/*
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back.
@@ -107,6 +108,7 @@ ENTRY(xen_sysret64)
pushq $VGCF_in_syscall
jmp hypercall_iret
+SYM_CODE_END(xen_sysret64)
/*
* Xen handles syscall callbacks much like ordinary exceptions, which
@@ -124,7 +126,7 @@ ENTRY(xen_sysret64)
*/
/* Normal 64-bit system call target */
-ENTRY(xen_syscall_target)
+SYM_FUNC_START(xen_syscall_target)
popq %rcx
popq %r11
@@ -137,12 +139,12 @@ ENTRY(xen_syscall_target)
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
-ENDPROC(xen_syscall_target)
+SYM_FUNC_END(xen_syscall_target)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
-ENTRY(xen_syscall32_target)
+SYM_FUNC_START(xen_syscall32_target)
popq %rcx
popq %r11
@@ -155,25 +157,25 @@ ENTRY(xen_syscall32_target)
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
-ENDPROC(xen_syscall32_target)
+SYM_FUNC_END(xen_syscall32_target)
/* 32-bit compat sysenter target */
-ENTRY(xen_sysenter_target)
+SYM_FUNC_START(xen_sysenter_target)
mov 0*8(%rsp), %rcx
mov 1*8(%rsp), %r11
mov 5*8(%rsp), %rsp
jmp entry_SYSENTER_compat
-ENDPROC(xen_sysenter_target)
+SYM_FUNC_END(xen_sysenter_target)
#else /* !CONFIG_IA32_EMULATION */
-ENTRY(xen_syscall32_target)
-ENTRY(xen_sysenter_target)
+SYM_FUNC_START_ALIAS(xen_syscall32_target)
+SYM_FUNC_START(xen_sysenter_target)
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
-ENDPROC(xen_syscall32_target)
-ENDPROC(xen_sysenter_target)
+SYM_FUNC_END(xen_sysenter_target)
+SYM_FUNC_END_ALIAS(xen_syscall32_target)
#endif /* CONFIG_IA32_EMULATION */
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index c1d8b90aa4e2..1d0cee3163e4 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -22,7 +22,7 @@
#ifdef CONFIG_XEN_PV
__INIT
-ENTRY(startup_xen)
+SYM_CODE_START(startup_xen)
UNWIND_HINT_EMPTY
cld
@@ -52,13 +52,13 @@ ENTRY(startup_xen)
#endif
jmp xen_start_kernel
-END(startup_xen)
+SYM_CODE_END(startup_xen)
__FINIT
#endif
.pushsection .text
.balign PAGE_SIZE
-ENTRY(hypercall_page)
+SYM_CODE_START(hypercall_page)
.rept (PAGE_SIZE / 32)
UNWIND_HINT_EMPTY
.skip 32
@@ -69,7 +69,7 @@ ENTRY(hypercall_page)
.type xen_hypercall_##n, @function; .size xen_hypercall_##n, 32
#include <asm/xen-hypercalls.h>
#undef HYPERCALL
-END(hypercall_page)
+SYM_CODE_END(hypercall_page)
.popsection
ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux")
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index 943f10639a93..0043d5858f14 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -14,6 +14,8 @@
* Joe Taylor <joe@tensilica.com, joetylr@yahoo.com>
*/
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#include <asm-generic/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/thread_info.h>
@@ -124,18 +126,16 @@ SECTIONS
. = ALIGN(16);
- RODATA
+ RO_DATA(4096)
/* Relocation table */
.fixup : { *(.fixup) }
- EXCEPTION_TABLE(16)
- NOTES
/* Data section */
_sdata = .;
- RW_DATA_SECTION(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
+ RW_DATA(XCHAL_ICACHE_LINESIZE, PAGE_SIZE, THREAD_SIZE)
_edata = .;
/* Initialization code and data: */