summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/kernel/rtc.c8
-rw-r--r--arch/arc/include/asm/processor.h14
-rw-r--r--arch/arc/include/asm/stacktrace.h37
-rw-r--r--arch/arc/kernel/process.c23
-rw-r--r--arch/arc/kernel/signal.c24
-rw-r--r--arch/arc/kernel/stacktrace.c21
-rw-r--r--arch/arc/kernel/unaligned.c2
-rw-r--r--arch/arc/mm/fault.c12
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/Makefile1
-rw-r--r--arch/arm/boot/dts/am335x-bone-common.dtsi8
-rw-r--r--arch/arm/boot/dts/am335x-bone.dts8
-rw-r--r--arch/arm/boot/dts/am335x-lxm.dts4
-rw-r--r--arch/arm/boot/dts/am33xx-clocks.dtsi6
-rw-r--r--arch/arm/boot/dts/am43xx-clocks.dtsi12
-rw-r--r--arch/arm/boot/dts/at91sam9260.dtsi7
-rw-r--r--arch/arm/boot/dts/at91sam9261.dtsi9
-rw-r--r--arch/arm/boot/dts/at91sam9263.dtsi5
-rw-r--r--arch/arm/boot/dts/at91sam9g45.dtsi3
-rw-r--r--arch/arm/boot/dts/at91sam9n12.dtsi1
-rw-r--r--arch/arm/boot/dts/at91sam9x5.dtsi5
-rw-r--r--arch/arm/boot/dts/dm8168-evm.dts19
-rw-r--r--arch/arm/boot/dts/dm816x.dtsi18
-rw-r--r--arch/arm/boot/dts/dra7-evm.dts10
-rw-r--r--arch/arm/boot/dts/dra7.dtsi2
-rw-r--r--arch/arm/boot/dts/dra72-evm.dts10
-rw-r--r--arch/arm/boot/dts/dra7xx-clocks.dtsi90
-rw-r--r--arch/arm/boot/dts/exynos3250.dtsi2
-rw-r--r--arch/arm/boot/dts/exynos4-cpu-thermal.dtsi52
-rw-r--r--arch/arm/boot/dts/exynos4.dtsi45
-rw-r--r--arch/arm/boot/dts/exynos4210-trats.dts19
-rw-r--r--arch/arm/boot/dts/exynos4210-universal_c210.dts57
-rw-r--r--arch/arm/boot/dts/exynos4210.dtsi38
-rw-r--r--arch/arm/boot/dts/exynos4212.dtsi5
-rw-r--r--arch/arm/boot/dts/exynos4412-odroid-common.dtsi64
-rw-r--r--arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi24
-rw-r--r--arch/arm/boot/dts/exynos4412-trats2.dts15
-rw-r--r--arch/arm/boot/dts/exynos4412.dtsi5
-rw-r--r--arch/arm/boot/dts/exynos4x12.dtsi12
-rw-r--r--arch/arm/boot/dts/exynos5250.dtsi44
-rw-r--r--arch/arm/boot/dts/exynos5420-trip-points.dtsi35
-rw-r--r--arch/arm/boot/dts/exynos5420.dtsi33
-rw-r--r--arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi24
-rw-r--r--arch/arm/boot/dts/exynos5440-trip-points.dtsi25
-rw-r--r--arch/arm/boot/dts/exynos5440.dtsi18
-rw-r--r--arch/arm/boot/dts/imx6qdl-sabresd.dtsi2
-rw-r--r--arch/arm/boot/dts/imx6sl-evk.dts2
-rw-r--r--arch/arm/boot/dts/omap3.dtsi4
-rw-r--r--arch/arm/boot/dts/omap5-core-thermal.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5-gpu-thermal.dtsi2
-rw-r--r--arch/arm/boot/dts/omap5.dtsi4
-rw-r--r--arch/arm/boot/dts/omap54xx-clocks.dtsi41
-rw-r--r--arch/arm/boot/dts/rk3288.dtsi1
-rw-r--r--arch/arm/boot/dts/sama5d3.dtsi3
-rw-r--r--arch/arm/boot/dts/sama5d4.dtsi9
-rw-r--r--arch/arm/boot/dts/socfpga.dtsi8
-rw-r--r--arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts16
-rw-r--r--arch/arm/boot/dts/sun4i-a10.dtsi3
-rw-r--r--arch/arm/boot/dts/sun5i-a13.dtsi3
-rw-r--r--arch/arm/boot/dts/sun7i-a20.dtsi3
-rw-r--r--arch/arm/common/bL_switcher.c16
-rw-r--r--arch/arm/configs/at91_dt_defconfig1
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--arch/arm/configs/omap2plus_defconfig1
-rw-r--r--arch/arm/configs/sama5_defconfig2
-rw-r--r--arch/arm/configs/sunxi_defconfig1
-rw-r--r--arch/arm/configs/vexpress_defconfig2
-rw-r--r--arch/arm/crypto/aesbs-core.S_shipped12
-rw-r--r--arch/arm/crypto/bsaes-armv7.pl12
-rw-r--r--arch/arm/include/asm/jump_label.h5
-rw-r--r--arch/arm/include/asm/kvm_arm.h1
-rw-r--r--arch/arm/include/asm/kvm_host.h15
-rw-r--r--arch/arm/include/asm/kvm_mmio.h22
-rw-r--r--arch/arm/include/asm/kvm_mmu.h15
-rw-r--r--arch/arm/include/asm/mach/time.h3
-rw-r--r--arch/arm/include/debug/at91.S5
-rw-r--r--arch/arm/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm/kernel/asm-offsets.c4
-rw-r--r--arch/arm/kernel/setup.c5
-rw-r--r--arch/arm/kernel/time.c6
-rw-r--r--arch/arm/kvm/Kconfig30
-rw-r--r--arch/arm/kvm/Makefile12
-rw-r--r--arch/arm/kvm/arm.c47
-rw-r--r--arch/arm/kvm/guest.c18
-rw-r--r--arch/arm/kvm/interrupts_head.S8
-rw-r--r--arch/arm/kvm/mmio.c64
-rw-r--r--arch/arm/kvm/mmu.c209
-rw-r--r--arch/arm/kvm/trace.h58
-rw-r--r--arch/arm/mach-at91/pm.c22
-rw-r--r--arch/arm/mach-at91/pm.h2
-rw-r--r--arch/arm/mach-at91/pm_slowclock.S80
-rw-r--r--arch/arm/mach-exynos/platsmp.c3
-rw-r--r--arch/arm/mach-exynos/pm_domains.c28
-rw-r--r--arch/arm/mach-exynos/suspend.c4
-rw-r--r--arch/arm/mach-imx/mach-imx6q.c5
-rw-r--r--arch/arm/mach-msm/board-halibut.c8
-rw-r--r--arch/arm/mach-msm/board-qsd8x50.c8
-rw-r--r--arch/arm/mach-omap2/cpuidle44xx.c10
-rw-r--r--arch/arm/mach-omap2/id.c2
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.c10
-rw-r--r--arch/arm/mach-omap2/omap_hwmod.h1
-rw-r--r--arch/arm/mach-omap2/omap_hwmod_7xx_data.c103
-rw-r--r--arch/arm/mach-omap2/pdata-quirks.c1
-rw-r--r--arch/arm/mach-omap2/prm44xx.c4
-rw-r--r--arch/arm/mach-pxa/idp.c6
-rw-r--r--arch/arm/mach-pxa/irq.c111
-rw-r--r--arch/arm/mach-pxa/lpd270.c8
-rw-r--r--arch/arm/mach-pxa/zeus.c2
-rw-r--r--arch/arm/mach-realview/core.c7
-rw-r--r--arch/arm/mach-realview/realview_eb.c2
-rw-r--r--arch/arm/mach-sa1100/neponset.c6
-rw-r--r--arch/arm/mach-sa1100/pleb.c7
-rw-r--r--arch/arm/mach-socfpga/core.h2
-rw-r--r--arch/arm/mach-socfpga/socfpga.c5
-rw-r--r--arch/arm/mach-sti/board-dt.c1
-rw-r--r--arch/arm/mach-sunxi/Kconfig8
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra114.c6
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra20.c10
-rw-r--r--arch/arm/mach-tegra/cpuidle-tegra30.c10
-rw-r--r--arch/arm/mm/cache-l2x0.c33
-rw-r--r--arch/arm/mm/dma-mapping.c2
-rw-r--r--arch/arm/mm/fault.c1
-rw-r--r--arch/arm/mm/pageattr.c5
-rw-r--r--arch/arm/plat-omap/counter_32k.c20
-rw-r--r--arch/arm/plat-omap/dmtimer.c15
-rw-r--r--arch/arm64/boot/dts/apm/apm-storm.dtsi4
-rw-r--r--arch/arm64/boot/dts/arm/juno-clocks.dtsi2
-rw-r--r--arch/arm64/include/asm/cmpxchg.h32
-rw-r--r--arch/arm64/include/asm/esr.h1
-rw-r--r--arch/arm64/include/asm/jump_label.h8
-rw-r--r--arch/arm64/include/asm/kvm_arm.h6
-rw-r--r--arch/arm64/include/asm/kvm_host.h15
-rw-r--r--arch/arm64/include/asm/kvm_mmio.h22
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h48
-rw-r--r--arch/arm64/include/asm/mmu_context.h9
-rw-r--r--arch/arm64/include/asm/percpu.h44
-rw-r--r--arch/arm64/include/asm/proc-fns.h6
-rw-r--r--arch/arm64/include/asm/tlb.h3
-rw-r--r--arch/arm64/include/asm/tlbflush.h13
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h3
-rw-r--r--arch/arm64/kernel/efi.c15
-rw-r--r--arch/arm64/kernel/head.S2
-rw-r--r--arch/arm64/kernel/process.c8
-rw-r--r--arch/arm64/kernel/vdso.c10
-rw-r--r--arch/arm64/kvm/Kconfig18
-rw-r--r--arch/arm64/kvm/Makefile20
-rw-r--r--arch/arm64/mm/dma-mapping.c12
-rw-r--r--arch/arm64/mm/pageattr.c5
-rw-r--r--arch/c6x/include/asm/pgtable.h5
-rw-r--r--arch/metag/include/asm/io.h1
-rw-r--r--arch/metag/include/asm/pgtable-bits.h104
-rw-r--r--arch/metag/include/asm/pgtable.h95
-rw-r--r--arch/metag/include/asm/processor.h4
-rw-r--r--arch/microblaze/kernel/entry.S7
-rw-r--r--arch/mips/include/asm/asmmacro-32.h128
-rw-r--r--arch/mips/include/asm/asmmacro.h218
-rw-r--r--arch/mips/include/asm/fpu.h20
-rw-r--r--arch/mips/include/asm/jump_label.h7
-rw-r--r--arch/mips/include/asm/kdebug.h3
-rw-r--r--arch/mips/include/asm/kvm_host.h125
-rw-r--r--arch/mips/include/asm/processor.h2
-rw-r--r--arch/mips/include/uapi/asm/kvm.h164
-rw-r--r--arch/mips/kernel/asm-offsets.c105
-rw-r--r--arch/mips/kernel/genex.S15
-rw-r--r--arch/mips/kernel/ptrace.c30
-rw-r--r--arch/mips/kernel/r4k_fpu.S2
-rw-r--r--arch/mips/kernel/traps.c33
-rw-r--r--arch/mips/kvm/Makefile8
-rw-r--r--arch/mips/kvm/emulate.c332
-rw-r--r--arch/mips/kvm/fpu.S122
-rw-r--r--arch/mips/kvm/locore.S38
-rw-r--r--arch/mips/kvm/mips.c472
-rw-r--r--arch/mips/kvm/msa.S161
-rw-r--r--arch/mips/kvm/stats.c4
-rw-r--r--arch/mips/kvm/tlb.c7
-rw-r--r--arch/mips/kvm/trace.h6
-rw-r--r--arch/mips/kvm/trap_emul.c199
-rw-r--r--arch/mips/lasat/sysctl.c4
-rw-r--r--arch/nios2/include/asm/ptrace.h47
-rw-r--r--arch/nios2/include/asm/thread_info.h4
-rw-r--r--arch/nios2/include/asm/ucontext.h32
-rw-r--r--arch/nios2/include/uapi/asm/Kbuild3
-rw-r--r--arch/nios2/include/uapi/asm/elf.h4
-rw-r--r--arch/nios2/include/uapi/asm/ptrace.h59
-rw-r--r--arch/nios2/include/uapi/asm/sigcontext.h12
-rw-r--r--arch/nios2/kernel/entry.S2
-rw-r--r--arch/nios2/kernel/signal.c6
-rw-r--r--arch/nios2/mm/cacheflush.c3
-rw-r--r--arch/nios2/mm/fault.c6
-rw-r--r--arch/parisc/include/asm/pgalloc.h17
-rw-r--r--arch/parisc/kernel/syscall_table.S9
-rw-r--r--arch/powerpc/include/asm/cputhreads.h2
-rw-r--r--arch/powerpc/include/asm/iommu.h6
-rw-r--r--arch/powerpc/include/asm/irq_work.h9
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h3
-rw-r--r--arch/powerpc/include/asm/reg.h3
-rw-r--r--arch/powerpc/kernel/cputable.c20
-rw-r--r--arch/powerpc/kernel/dbell.c2
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/iommu.c26
-rw-r--r--arch/powerpc/kernel/smp.c4
-rw-r--r--arch/powerpc/kvm/book3s_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S1
-rw-r--r--arch/powerpc/kvm/mpic.c17
-rw-r--r--arch/powerpc/kvm/powerpc.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S2
-rw-r--r--arch/powerpc/platforms/powernv/pci.c26
-rw-r--r--arch/powerpc/platforms/powernv/smp.c14
-rw-r--r--arch/powerpc/platforms/pseries/hvCall.S2
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c44
-rw-r--r--arch/s390/include/asm/elf.h2
-rw-r--r--arch/s390/include/asm/jump_label.h3
-rw-r--r--arch/s390/include/asm/kvm_host.h58
-rw-r--r--arch/s390/include/asm/mmu_context.h2
-rw-r--r--arch/s390/include/asm/page.h11
-rw-r--r--arch/s390/include/uapi/asm/kvm.h4
-rw-r--r--arch/s390/include/uapi/asm/sie.h4
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/ftrace.c61
-rw-r--r--arch/s390/kernel/jump_label.c12
-rw-r--r--arch/s390/kernel/module.c1
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c7
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/swsusp_asm64.S11
-rw-r--r--arch/s390/kernel/time.c20
-rw-r--r--arch/s390/kvm/diag.c6
-rw-r--r--arch/s390/kvm/gaccess.c296
-rw-r--r--arch/s390/kvm/gaccess.h21
-rw-r--r--arch/s390/kvm/guestdbg.c8
-rw-r--r--arch/s390/kvm/intercept.c5
-rw-r--r--arch/s390/kvm/interrupt.c1101
-rw-r--r--arch/s390/kvm/kvm-s390.c465
-rw-r--r--arch/s390/kvm/kvm-s390.h54
-rw-r--r--arch/s390/kvm/priv.c146
-rw-r--r--arch/s390/kvm/sigp.c7
-rw-r--r--arch/s390/pci/pci.c28
-rw-r--r--arch/s390/pci/pci_mmio.c17
-rw-r--r--arch/sparc/Kconfig3
-rw-r--r--arch/sparc/include/asm/hypervisor.h12
-rw-r--r--arch/sparc/include/asm/io_64.h20
-rw-r--r--arch/sparc/include/asm/jump_label.h5
-rw-r--r--arch/sparc/include/asm/starfire.h1
-rw-r--r--arch/sparc/kernel/entry.h4
-rw-r--r--arch/sparc/kernel/hvapi.c1
-rw-r--r--arch/sparc/kernel/hvcalls.S16
-rw-r--r--arch/sparc/kernel/pci.c5
-rw-r--r--arch/sparc/kernel/pcr.c33
-rw-r--r--arch/sparc/kernel/perf_event.c55
-rw-r--r--arch/sparc/kernel/process_64.c4
-rw-r--r--arch/sparc/kernel/smp_64.c27
-rw-r--r--arch/sparc/kernel/starfire.c5
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/kernel/time_32.c6
-rw-r--r--arch/sparc/kernel/traps_64.c30
-rw-r--r--arch/sparc/lib/memmove.S35
-rw-r--r--arch/sparc/mm/init_64.c2
-rw-r--r--arch/tile/kernel/time.c24
-rw-r--r--arch/x86/Kconfig35
-rw-r--r--arch/x86/boot/compressed/aslr.c35
-rw-r--r--arch/x86/boot/compressed/head_32.S3
-rw-r--r--arch/x86/boot/compressed/head_64.S5
-rw-r--r--arch/x86/boot/compressed/misc.c6
-rw-r--r--arch/x86/boot/compressed/misc.h4
-rw-r--r--arch/x86/boot/string.c2
-rw-r--r--arch/x86/boot/video-mode.c4
-rw-r--r--arch/x86/boot/video.c2
-rw-r--r--arch/x86/boot/video.h1
-rw-r--r--arch/x86/configs/i386_defconfig2
-rw-r--r--arch/x86/configs/x86_64_defconfig2
-rw-r--r--arch/x86/crypto/aesni-intel_glue.c4
-rw-r--r--arch/x86/crypto/crc32c-pcl-intel-asm_64.S2
-rw-r--r--arch/x86/crypto/twofish-x86_64-asm_64.S4
-rw-r--r--arch/x86/ia32/Makefile1
-rw-r--r--arch/x86/ia32/ia32_signal.c19
-rw-r--r--arch/x86/ia32/ia32entry.S485
-rw-r--r--arch/x86/ia32/nosyscall.c7
-rw-r--r--arch/x86/ia32/sys_ia32.c14
-rw-r--r--arch/x86/ia32/syscall_ia32.c25
-rw-r--r--arch/x86/include/asm/alternative-asm.h53
-rw-r--r--arch/x86/include/asm/alternative.h73
-rw-r--r--arch/x86/include/asm/apic.h3
-rw-r--r--arch/x86/include/asm/barrier.h6
-rw-r--r--arch/x86/include/asm/calling.h284
-rw-r--r--arch/x86/include/asm/compat.h2
-rw-r--r--arch/x86/include/asm/cpufeature.h32
-rw-r--r--arch/x86/include/asm/desc.h7
-rw-r--r--arch/x86/include/asm/dwarf2.h24
-rw-r--r--arch/x86/include/asm/efi.h6
-rw-r--r--arch/x86/include/asm/elf.h7
-rw-r--r--arch/x86/include/asm/fpu-internal.h2
-rw-r--r--arch/x86/include/asm/hw_irq.h5
-rw-r--r--arch/x86/include/asm/insn.h2
-rw-r--r--arch/x86/include/asm/irqflags.h49
-rw-r--r--arch/x86/include/asm/jump_label.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h28
-rw-r--r--arch/x86/include/asm/kvm_para.h2
-rw-r--r--arch/x86/include/asm/mwait.h8
-rw-r--r--arch/x86/include/asm/page_types.h2
-rw-r--r--arch/x86/include/asm/paravirt.h5
-rw-r--r--arch/x86/include/asm/pci_x86.h2
-rw-r--r--arch/x86/include/asm/processor.h107
-rw-r--r--arch/x86/include/asm/ptrace.h45
-rw-r--r--arch/x86/include/asm/pvclock.h1
-rw-r--r--arch/x86/include/asm/segment.h289
-rw-r--r--arch/x86/include/asm/setup.h5
-rw-r--r--arch/x86/include/asm/sigcontext.h6
-rw-r--r--arch/x86/include/asm/sighandling.h4
-rw-r--r--arch/x86/include/asm/smap.h30
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/special_insns.h24
-rw-r--r--arch/x86/include/asm/thread_info.h74
-rw-r--r--arch/x86/include/asm/uaccess_64.h2
-rw-r--r--arch/x86/include/asm/xsave.h28
-rw-r--r--arch/x86/include/uapi/asm/bootparam.h2
-rw-r--r--arch/x86/include/uapi/asm/ptrace-abi.h16
-rw-r--r--arch/x86/include/uapi/asm/ptrace.h13
-rw-r--r--arch/x86/include/uapi/asm/sigcontext.h21
-rw-r--r--arch/x86/include/uapi/asm/vmx.h1
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c25
-rw-r--r--arch/x86/kernel/alternative.c163
-rw-r--r--arch/x86/kernel/apic/apic.c62
-rw-r--r--arch/x86/kernel/apic/apic_numachip.c22
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c89
-rw-r--r--arch/x86/kernel/asm-offsets_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_64.c1
-rw-r--r--arch/x86/kernel/cpu/amd.c5
-rw-r--r--arch/x86/kernel/cpu/common.c87
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh2
-rw-r--r--arch/x86/kernel/cpu/perf_event.c18
-rw-r--r--arch/x86/kernel/cpu/perf_event_intel.c10
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack.c4
-rw-r--r--arch/x86/kernel/dumpstack_32.c4
-rw-r--r--arch/x86/kernel/entry_32.S93
-rw-r--r--arch/x86/kernel/entry_64.S969
-rw-r--r--arch/x86/kernel/head64.c3
-rw-r--r--arch/x86/kernel/head_32.S3
-rw-r--r--arch/x86/kernel/head_64.S6
-rw-r--r--arch/x86/kernel/i387.c2
-rw-r--r--arch/x86/kernel/ioport.c2
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c2
-rw-r--r--arch/x86/kernel/irqinit.c3
-rw-r--r--arch/x86/kernel/kgdb.c6
-rw-r--r--arch/x86/kernel/kprobes/core.c4
-rw-r--r--arch/x86/kernel/module.c3
-rw-r--r--arch/x86/kernel/perf_regs.c40
-rw-r--r--arch/x86/kernel/process.c87
-rw-r--r--arch/x86/kernel/process_32.c27
-rw-r--r--arch/x86/kernel/process_64.c24
-rw-r--r--arch/x86/kernel/ptrace.c12
-rw-r--r--arch/x86/kernel/pvclock.c44
-rw-r--r--arch/x86/kernel/reboot.c10
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S8
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S16
-rw-r--r--arch/x86/kernel/setup.c15
-rw-r--r--arch/x86/kernel/signal.c50
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/syscall_32.c16
-rw-r--r--arch/x86/kernel/time.c2
-rw-r--r--arch/x86/kernel/traps.c52
-rw-r--r--arch/x86/kernel/uprobes.c2
-rw-r--r--arch/x86/kernel/vm86_32.c4
-rw-r--r--arch/x86/kernel/vsyscall_gtod.c24
-rw-r--r--arch/x86/kernel/xsave.c7
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/cpuid.c33
-rw-r--r--arch/x86/kvm/cpuid.h8
-rw-r--r--arch/x86/kvm/emulate.c196
-rw-r--r--arch/x86/kvm/i8254.c14
-rw-r--r--arch/x86/kvm/i8254.h2
-rw-r--r--arch/x86/kvm/i8259.c13
-rw-r--r--arch/x86/kvm/ioapic.c26
-rw-r--r--arch/x86/kvm/ioapic.h11
-rw-r--r--arch/x86/kvm/irq.h2
-rw-r--r--arch/x86/kvm/lapic.c154
-rw-r--r--arch/x86/kvm/lapic.h17
-rw-r--r--arch/x86/kvm/mmu.c73
-rw-r--r--arch/x86/kvm/pmu.c2
-rw-r--r--arch/x86/kvm/svm.c49
-rw-r--r--arch/x86/kvm/vmx.c187
-rw-r--r--arch/x86/kvm/x86.c172
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/atomic64_cx8_32.S50
-rw-r--r--arch/x86/lib/checksum_32.S64
-rw-r--r--arch/x86/lib/clear_page_64.S66
-rw-r--r--arch/x86/lib/copy_page_64.S37
-rw-r--r--arch/x86/lib/copy_user_64.S46
-rw-r--r--arch/x86/lib/csum-copy_64.S2
-rw-r--r--arch/x86/lib/insn.c13
-rw-r--r--arch/x86/lib/memcpy_64.S68
-rw-r--r--arch/x86/lib/memmove_64.S19
-rw-r--r--arch/x86/lib/memset_64.S61
-rw-r--r--arch/x86/lib/msr-reg.S24
-rw-r--r--arch/x86/lib/rwsem.S44
-rw-r--r--arch/x86/lib/thunk_32.S18
-rw-r--r--arch/x86/lib/thunk_64.S28
-rw-r--r--arch/x86/lib/usercopy_64.c15
-rw-r--r--arch/x86/lib/x86-opcode-map.txt9
-rw-r--r--arch/x86/mm/fault.c8
-rw-r--r--arch/x86/mm/init.c3
-rw-r--r--arch/x86/oprofile/backtrace.c2
-rw-r--r--arch/x86/pci/acpi.c11
-rw-r--r--arch/x86/pci/common.c34
-rw-r--r--arch/x86/pci/intel_mid_pci.c4
-rw-r--r--arch/x86/pci/irq.c15
-rw-r--r--arch/x86/platform/efi/efi.c17
-rw-r--r--arch/x86/platform/efi/efi_32.c22
-rw-r--r--arch/x86/platform/efi/efi_64.c29
-rw-r--r--arch/x86/power/cpu.c2
-rw-r--r--arch/x86/syscalls/syscall_32.tbl4
-rw-r--r--arch/x86/syscalls/syscall_64.tbl2
-rw-r--r--arch/x86/um/asm/barrier.h4
-rw-r--r--arch/x86/um/sys_call_table_64.c2
-rw-r--r--arch/x86/vdso/vclock_gettime.c34
-rw-r--r--arch/x86/vdso/vdso32/sigreturn.S1
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--arch/x86/xen/p2m.c12
-rw-r--r--arch/x86/xen/smp.c14
-rw-r--r--arch/x86/xen/suspend.c11
-rw-r--r--arch/x86/xen/xen-asm_64.S8
424 files changed, 8841 insertions, 4829 deletions
diff --git a/arch/alpha/kernel/rtc.c b/arch/alpha/kernel/rtc.c
index c8d284d8521f..f535a3fd0f60 100644
--- a/arch/alpha/kernel/rtc.c
+++ b/arch/alpha/kernel/rtc.c
@@ -116,7 +116,7 @@ alpha_rtc_set_time(struct device *dev, struct rtc_time *tm)
}
static int
-alpha_rtc_set_mmss(struct device *dev, unsigned long nowtime)
+alpha_rtc_set_mmss(struct device *dev, time64_t nowtime)
{
int retval = 0;
int real_seconds, real_minutes, cmos_minutes;
@@ -211,7 +211,7 @@ alpha_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
static const struct rtc_class_ops alpha_rtc_ops = {
.read_time = alpha_rtc_read_time,
.set_time = alpha_rtc_set_time,
- .set_mmss = alpha_rtc_set_mmss,
+ .set_mmss64 = alpha_rtc_set_mmss,
.ioctl = alpha_rtc_ioctl,
};
@@ -276,7 +276,7 @@ do_remote_mmss(void *data)
}
static int
-remote_set_mmss(struct device *dev, unsigned long now)
+remote_set_mmss(struct device *dev, time64_t now)
{
union remote_data x;
if (smp_processor_id() != boot_cpuid) {
@@ -290,7 +290,7 @@ remote_set_mmss(struct device *dev, unsigned long now)
static const struct rtc_class_ops remote_rtc_ops = {
.read_time = remote_read_time,
.set_time = remote_set_time,
- .set_mmss = remote_set_mmss,
+ .set_mmss64 = remote_set_mmss,
.ioctl = alpha_rtc_ioctl,
};
#endif
diff --git a/arch/arc/include/asm/processor.h b/arch/arc/include/asm/processor.h
index 4e547296831d..52312cb5dbe2 100644
--- a/arch/arc/include/asm/processor.h
+++ b/arch/arc/include/asm/processor.h
@@ -47,9 +47,6 @@ struct thread_struct {
/* Forward declaration, a strange C thing */
struct task_struct;
-/* Return saved PC of a blocked thread */
-unsigned long thread_saved_pc(struct task_struct *t);
-
#define task_pt_regs(p) \
((struct pt_regs *)(THREAD_SIZE + (void *)task_stack_page(p)) - 1)
@@ -72,18 +69,21 @@ unsigned long thread_saved_pc(struct task_struct *t);
#define release_segments(mm) do { } while (0)
#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ret)
+#define KSTK_ESP(tsk) (task_pt_regs(tsk)->sp)
/*
* Where abouts of Task's sp, fp, blink when it was last seen in kernel mode.
* Look in process.c for details of kernel stack layout
*/
-#define KSTK_ESP(tsk) (tsk->thread.ksp)
+#define TSK_K_ESP(tsk) (tsk->thread.ksp)
-#define KSTK_REG(tsk, off) (*((unsigned int *)(KSTK_ESP(tsk) + \
+#define TSK_K_REG(tsk, off) (*((unsigned int *)(TSK_K_ESP(tsk) + \
sizeof(struct callee_regs) + off)))
-#define KSTK_BLINK(tsk) KSTK_REG(tsk, 4)
-#define KSTK_FP(tsk) KSTK_REG(tsk, 0)
+#define TSK_K_BLINK(tsk) TSK_K_REG(tsk, 4)
+#define TSK_K_FP(tsk) TSK_K_REG(tsk, 0)
+
+#define thread_saved_pc(tsk) TSK_K_BLINK(tsk)
extern void start_thread(struct pt_regs * regs, unsigned long pc,
unsigned long usp);
diff --git a/arch/arc/include/asm/stacktrace.h b/arch/arc/include/asm/stacktrace.h
new file mode 100644
index 000000000000..b29b6064ea14
--- /dev/null
+++ b/arch/arc/include/asm/stacktrace.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
+ * Copyright (C) 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __ASM_STACKTRACE_H
+#define __ASM_STACKTRACE_H
+
+#include <linux/sched.h>
+
+/**
+ * arc_unwind_core - Unwind the kernel mode stack for an execution context
+ * @tsk: NULL for current task, specific task otherwise
+ * @regs: pt_regs used to seed the unwinder {SP, FP, BLINK, PC}
+ * If NULL, use pt_regs of @tsk (if !NULL) otherwise
+ * use the current values of {SP, FP, BLINK, PC}
+ * @consumer_fn: Callback invoked for each frame unwound
+ * Returns 0 to continue unwinding, -1 to stop
+ * @arg: Arg to callback
+ *
+ * Returns the address of first function in stack
+ *
+ * Semantics:
+ * - synchronous unwinding (e.g. dump_stack): @tsk NULL, @regs NULL
+ * - Asynchronous unwinding of sleeping task: @tsk !NULL, @regs NULL
+ * - Asynchronous unwinding of intr/excp etc: @tsk !NULL, @regs !NULL
+ */
+notrace noinline unsigned int arc_unwind_core(
+ struct task_struct *tsk, struct pt_regs *regs,
+ int (*consumer_fn) (unsigned int, void *),
+ void *arg);
+
+#endif /* __ASM_STACKTRACE_H */
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
index fdd89715d2d3..98c00a2d4dd9 100644
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -192,29 +192,6 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu)
return 0;
}
-/*
- * API: expected by schedular Code: If thread is sleeping where is that.
- * What is this good for? it will be always the scheduler or ret_from_fork.
- * So we hard code that anyways.
- */
-unsigned long thread_saved_pc(struct task_struct *t)
-{
- struct pt_regs *regs = task_pt_regs(t);
- unsigned long blink = 0;
-
- /*
- * If the thread being queried for in not itself calling this, then it
- * implies it is not executing, which in turn implies it is sleeping,
- * which in turn implies it got switched OUT by the schedular.
- * In that case, it's kernel mode blink can reliably retrieved as per
- * the picture above (right above pt_regs).
- */
- if (t != current && t->state != TASK_RUNNING)
- blink = *((unsigned int *)regs - 1);
-
- return blink;
-}
-
int elf_check_arch(const struct elf32_hdr *x)
{
unsigned int eflags;
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c
index 114234e83caa..edda76fae83f 100644
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -67,7 +67,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
sigset_t *set)
{
int err;
- err = __copy_to_user(&(sf->uc.uc_mcontext.regs), regs,
+ err = __copy_to_user(&(sf->uc.uc_mcontext.regs.scratch), regs,
sizeof(sf->uc.uc_mcontext.regs.scratch));
err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
@@ -83,7 +83,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
if (!err)
set_current_blocked(&set);
- err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs),
+ err |= __copy_from_user(regs, &(sf->uc.uc_mcontext.regs.scratch),
sizeof(sf->uc.uc_mcontext.regs.scratch));
return err;
@@ -131,6 +131,15 @@ SYSCALL_DEFINE0(rt_sigreturn)
/* Don't restart from sigreturn */
syscall_wont_restart(regs);
+ /*
+ * Ensure that sigreturn always returns to user mode (in case the
+ * regs saved on user stack got fudged between save and sigreturn)
+ * Otherwise it is easy to panic the kernel with a custom
+ * signal handler and/or restorer which clobberes the status32/ret
+ * to return to a bogus location in kernel mode.
+ */
+ regs->status32 |= STATUS_U_MASK;
+
return regs->r0;
badframe:
@@ -229,8 +238,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
/*
* handler returns using sigreturn stub provided already by userpsace
+ * If not, nuke the process right away
*/
- BUG_ON(!(ksig->ka.sa.sa_flags & SA_RESTORER));
+ if(!(ksig->ka.sa.sa_flags & SA_RESTORER))
+ return 1;
+
regs->blink = (unsigned long)ksig->ka.sa.sa_restorer;
/* User Stack for signal handler will be above the frame just carved */
@@ -296,12 +308,12 @@ static void
handle_signal(struct ksignal *ksig, struct pt_regs *regs)
{
sigset_t *oldset = sigmask_to_save();
- int ret;
+ int failed;
/* Set up the stack frame */
- ret = setup_rt_frame(ksig, oldset, regs);
+ failed = setup_rt_frame(ksig, oldset, regs);
- signal_setup_done(ret, ksig, 0);
+ signal_setup_done(failed, ksig, 0);
}
void do_signal(struct pt_regs *regs)
diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c
index 9ce47cfe2303..92320d6f737c 100644
--- a/arch/arc/kernel/stacktrace.c
+++ b/arch/arc/kernel/stacktrace.c
@@ -43,6 +43,10 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
struct pt_regs *regs,
struct unwind_frame_info *frame_info)
{
+ /*
+ * synchronous unwinding (e.g. dump_stack)
+ * - uses current values of SP and friends
+ */
if (tsk == NULL && regs == NULL) {
unsigned long fp, sp, blink, ret;
frame_info->task = current;
@@ -61,12 +65,17 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
frame_info->regs.r63 = ret;
frame_info->call_frame = 0;
} else if (regs == NULL) {
+ /*
+ * Asynchronous unwinding of sleeping task
+ * - Gets SP etc from task's pt_regs (saved bottom of kernel
+ * mode stack of task)
+ */
frame_info->task = tsk;
- frame_info->regs.r27 = KSTK_FP(tsk);
- frame_info->regs.r28 = KSTK_ESP(tsk);
- frame_info->regs.r31 = KSTK_BLINK(tsk);
+ frame_info->regs.r27 = TSK_K_FP(tsk);
+ frame_info->regs.r28 = TSK_K_ESP(tsk);
+ frame_info->regs.r31 = TSK_K_BLINK(tsk);
frame_info->regs.r63 = (unsigned int)__switch_to;
/* In the prologue of __switch_to, first FP is saved on stack
@@ -83,6 +92,10 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
frame_info->call_frame = 0;
} else {
+ /*
+ * Asynchronous unwinding of intr/exception
+ * - Just uses the pt_regs passed
+ */
frame_info->task = tsk;
frame_info->regs.r27 = regs->fp;
@@ -95,7 +108,7 @@ static void seed_unwind_frame_info(struct task_struct *tsk,
#endif
-static noinline unsigned int
+notrace noinline unsigned int
arc_unwind_core(struct task_struct *tsk, struct pt_regs *regs,
int (*consumer_fn) (unsigned int, void *), void *arg)
{
diff --git a/arch/arc/kernel/unaligned.c b/arch/arc/kernel/unaligned.c
index 7ff5b5c183bb..74db59b6f392 100644
--- a/arch/arc/kernel/unaligned.c
+++ b/arch/arc/kernel/unaligned.c
@@ -12,6 +12,7 @@
*/
#include <linux/types.h>
+#include <linux/perf_event.h>
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <asm/disasm.h>
@@ -253,6 +254,7 @@ int misaligned_fixup(unsigned long address, struct pt_regs *regs,
}
}
+ perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, address);
return 0;
fault:
diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c
index 563cb27e37f5..6a2e006cbcce 100644
--- a/arch/arc/mm/fault.c
+++ b/arch/arc/mm/fault.c
@@ -14,6 +14,7 @@
#include <linux/ptrace.h>
#include <linux/uaccess.h>
#include <linux/kdebug.h>
+#include <linux/perf_event.h>
#include <asm/pgalloc.h>
#include <asm/mmu.h>
@@ -139,13 +140,20 @@ good_area:
return;
}
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
+
if (likely(!(fault & VM_FAULT_ERROR))) {
if (flags & FAULT_FLAG_ALLOW_RETRY) {
/* To avoid updating stats twice for retry case */
- if (fault & VM_FAULT_MAJOR)
+ if (fault & VM_FAULT_MAJOR) {
tsk->maj_flt++;
- else
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1,
+ regs, address);
+ } else {
tsk->min_flt++;
+ perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1,
+ regs, address);
+ }
if (fault & VM_FAULT_RETRY) {
flags &= ~FAULT_FLAG_ALLOW_RETRY;
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9f1f09a2bc9b..cf4c0c99aa25 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -619,6 +619,7 @@ config ARCH_PXA
select GENERIC_CLOCKEVENTS
select GPIO_PXA
select HAVE_IDE
+ select IRQ_DOMAIN
select MULTI_IRQ_HANDLER
select PLAT_PXA
select SPARSE_IRQ
diff --git a/arch/arm/Makefile b/arch/arm/Makefile
index 7f99cd652203..eb7bb511f853 100644
--- a/arch/arm/Makefile
+++ b/arch/arm/Makefile
@@ -150,6 +150,7 @@ machine-$(CONFIG_ARCH_BERLIN) += berlin
machine-$(CONFIG_ARCH_CLPS711X) += clps711x
machine-$(CONFIG_ARCH_CNS3XXX) += cns3xxx
machine-$(CONFIG_ARCH_DAVINCI) += davinci
+machine-$(CONFIG_ARCH_DIGICOLOR) += digicolor
machine-$(CONFIG_ARCH_DOVE) += dove
machine-$(CONFIG_ARCH_EBSA110) += ebsa110
machine-$(CONFIG_ARCH_EFM32) += efm32
diff --git a/arch/arm/boot/dts/am335x-bone-common.dtsi b/arch/arm/boot/dts/am335x-bone-common.dtsi
index 2c6248d9a9ef..c3255e0c90aa 100644
--- a/arch/arm/boot/dts/am335x-bone-common.dtsi
+++ b/arch/arm/boot/dts/am335x-bone-common.dtsi
@@ -301,3 +301,11 @@
cd-gpios = <&gpio0 6 GPIO_ACTIVE_HIGH>;
cd-inverted;
};
+
+&aes {
+ status = "okay";
+};
+
+&sham {
+ status = "okay";
+};
diff --git a/arch/arm/boot/dts/am335x-bone.dts b/arch/arm/boot/dts/am335x-bone.dts
index 83d40f7655e5..6b8493720424 100644
--- a/arch/arm/boot/dts/am335x-bone.dts
+++ b/arch/arm/boot/dts/am335x-bone.dts
@@ -24,11 +24,3 @@
&mmc1 {
vmmc-supply = <&ldo3_reg>;
};
-
-&sham {
- status = "okay";
-};
-
-&aes {
- status = "okay";
-};
diff --git a/arch/arm/boot/dts/am335x-lxm.dts b/arch/arm/boot/dts/am335x-lxm.dts
index 7266a00aab2e..5c5667a3624d 100644
--- a/arch/arm/boot/dts/am335x-lxm.dts
+++ b/arch/arm/boot/dts/am335x-lxm.dts
@@ -328,6 +328,10 @@
dual_emac_res_vlan = <3>;
};
+&phy_sel {
+ rmii-clock-ext;
+};
+
&mac {
pinctrl-names = "default", "sleep";
pinctrl-0 = <&cpsw_default>;
diff --git a/arch/arm/boot/dts/am33xx-clocks.dtsi b/arch/arm/boot/dts/am33xx-clocks.dtsi
index 712edce7d6fb..071b56aa0c7e 100644
--- a/arch/arm/boot/dts/am33xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am33xx-clocks.dtsi
@@ -99,7 +99,7 @@
ehrpwm0_tbclk: ehrpwm0_tbclk@44e10664 {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <0>;
reg = <0x0664>;
};
@@ -107,7 +107,7 @@
ehrpwm1_tbclk: ehrpwm1_tbclk@44e10664 {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <1>;
reg = <0x0664>;
};
@@ -115,7 +115,7 @@
ehrpwm2_tbclk: ehrpwm2_tbclk@44e10664 {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <2>;
reg = <0x0664>;
};
diff --git a/arch/arm/boot/dts/am43xx-clocks.dtsi b/arch/arm/boot/dts/am43xx-clocks.dtsi
index c7dc9dab93a4..cfb49686ab6a 100644
--- a/arch/arm/boot/dts/am43xx-clocks.dtsi
+++ b/arch/arm/boot/dts/am43xx-clocks.dtsi
@@ -107,7 +107,7 @@
ehrpwm0_tbclk: ehrpwm0_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <0>;
reg = <0x0664>;
};
@@ -115,7 +115,7 @@
ehrpwm1_tbclk: ehrpwm1_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <1>;
reg = <0x0664>;
};
@@ -123,7 +123,7 @@
ehrpwm2_tbclk: ehrpwm2_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <2>;
reg = <0x0664>;
};
@@ -131,7 +131,7 @@
ehrpwm3_tbclk: ehrpwm3_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <4>;
reg = <0x0664>;
};
@@ -139,7 +139,7 @@
ehrpwm4_tbclk: ehrpwm4_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <5>;
reg = <0x0664>;
};
@@ -147,7 +147,7 @@
ehrpwm5_tbclk: ehrpwm5_tbclk {
#clock-cells = <0>;
compatible = "ti,gate-clock";
- clocks = <&dpll_per_m2_ck>;
+ clocks = <&l4ls_gclk>;
ti,bit-shift = <6>;
reg = <0x0664>;
};
diff --git a/arch/arm/boot/dts/at91sam9260.dtsi b/arch/arm/boot/dts/at91sam9260.dtsi
index fff0ee69aab4..e7f0a4ae271c 100644
--- a/arch/arm/boot/dts/at91sam9260.dtsi
+++ b/arch/arm/boot/dts/at91sam9260.dtsi
@@ -494,12 +494,12 @@
pinctrl_usart3_rts: usart3_rts-0 {
atmel,pins =
- <AT91_PIOB 8 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PC8 periph B */
+ <AT91_PIOC 8 AT91_PERIPH_B AT91_PINCTRL_NONE>;
};
pinctrl_usart3_cts: usart3_cts-0 {
atmel,pins =
- <AT91_PIOB 10 AT91_PERIPH_B AT91_PINCTRL_NONE>; /* PC10 periph B */
+ <AT91_PIOC 10 AT91_PERIPH_B AT91_PINCTRL_NONE>;
};
};
@@ -853,7 +853,7 @@
};
usb1: gadget@fffa4000 {
- compatible = "atmel,at91rm9200-udc";
+ compatible = "atmel,at91sam9260-udc";
reg = <0xfffa4000 0x4000>;
interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>;
clocks = <&udc_clk>, <&udpck>;
@@ -976,7 +976,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/at91sam9261.dtsi b/arch/arm/boot/dts/at91sam9261.dtsi
index e247b0b5fdab..d55fdf2487ef 100644
--- a/arch/arm/boot/dts/at91sam9261.dtsi
+++ b/arch/arm/boot/dts/at91sam9261.dtsi
@@ -124,11 +124,12 @@
};
usb1: gadget@fffa4000 {
- compatible = "atmel,at91rm9200-udc";
+ compatible = "atmel,at91sam9261-udc";
reg = <0xfffa4000 0x4000>;
interrupts = <10 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&usb>, <&udc_clk>, <&udpck>;
- clock-names = "usb_clk", "udc_clk", "udpck";
+ clocks = <&udc_clk>, <&udpck>;
+ clock-names = "pclk", "hclk";
+ atmel,matrix = <&matrix>;
status = "disabled";
};
@@ -262,7 +263,7 @@
};
matrix: matrix@ffffee00 {
- compatible = "atmel,at91sam9260-bus-matrix";
+ compatible = "atmel,at91sam9260-bus-matrix", "syscon";
reg = <0xffffee00 0x200>;
};
diff --git a/arch/arm/boot/dts/at91sam9263.dtsi b/arch/arm/boot/dts/at91sam9263.dtsi
index 1f67bb4c144e..fce301c4e9d6 100644
--- a/arch/arm/boot/dts/at91sam9263.dtsi
+++ b/arch/arm/boot/dts/at91sam9263.dtsi
@@ -69,7 +69,7 @@
sram1: sram@00500000 {
compatible = "mmio-sram";
- reg = <0x00300000 0x4000>;
+ reg = <0x00500000 0x4000>;
};
ahb {
@@ -856,7 +856,7 @@
};
usb1: gadget@fff78000 {
- compatible = "atmel,at91rm9200-udc";
+ compatible = "atmel,at91sam9263-udc";
reg = <0xfff78000 0x4000>;
interrupts = <24 IRQ_TYPE_LEVEL_HIGH 2>;
clocks = <&udc_clk>, <&udpck>;
@@ -905,7 +905,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/at91sam9g45.dtsi b/arch/arm/boot/dts/at91sam9g45.dtsi
index ee80aa9c0759..488af63d5174 100644
--- a/arch/arm/boot/dts/at91sam9g45.dtsi
+++ b/arch/arm/boot/dts/at91sam9g45.dtsi
@@ -1116,7 +1116,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
@@ -1301,7 +1300,7 @@
compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
reg = <0x00800000 0x100000>;
interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&usb>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
+ clocks = <&utmi>, <&uhphs_clk>, <&uhphs_clk>, <&uhpck>;
clock-names = "usb_clk", "ehci_clk", "hclk", "uhpck";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/at91sam9n12.dtsi b/arch/arm/boot/dts/at91sam9n12.dtsi
index c2666a7cb5b1..0c53a375ba99 100644
--- a/arch/arm/boot/dts/at91sam9n12.dtsi
+++ b/arch/arm/boot/dts/at91sam9n12.dtsi
@@ -894,7 +894,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
diff --git a/arch/arm/boot/dts/at91sam9x5.dtsi b/arch/arm/boot/dts/at91sam9x5.dtsi
index 818dabdd8c0e..d221179d0f1a 100644
--- a/arch/arm/boot/dts/at91sam9x5.dtsi
+++ b/arch/arm/boot/dts/at91sam9x5.dtsi
@@ -1066,7 +1066,7 @@
reg = <0x00500000 0x80000
0xf803c000 0x400>;
interrupts = <23 IRQ_TYPE_LEVEL_HIGH 0>;
- clocks = <&usb>, <&udphs_clk>;
+ clocks = <&utmi>, <&udphs_clk>;
clock-names = "hclk", "pclk";
status = "disabled";
@@ -1130,7 +1130,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
@@ -1186,7 +1185,7 @@
compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
reg = <0x00700000 0x100000>;
interrupts = <22 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+ clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
clock-names = "usb_clk", "ehci_clk", "uhpck";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts
index d3a29c1b8417..afe678f6d2e9 100644
--- a/arch/arm/boot/dts/dm8168-evm.dts
+++ b/arch/arm/boot/dts/dm8168-evm.dts
@@ -36,6 +36,20 @@
>;
};
+ mmc_pins: pinmux_mmc_pins {
+ pinctrl-single,pins = <
+ DM816X_IOPAD(0x0a70, MUX_MODE0) /* SD_POW */
+ DM816X_IOPAD(0x0a74, MUX_MODE0) /* SD_CLK */
+ DM816X_IOPAD(0x0a78, MUX_MODE0) /* SD_CMD */
+ DM816X_IOPAD(0x0a7C, MUX_MODE0) /* SD_DAT0 */
+ DM816X_IOPAD(0x0a80, MUX_MODE0) /* SD_DAT1 */
+ DM816X_IOPAD(0x0a84, MUX_MODE0) /* SD_DAT2 */
+ DM816X_IOPAD(0x0a88, MUX_MODE0) /* SD_DAT2 */
+ DM816X_IOPAD(0x0a8c, MUX_MODE2) /* GP1[7] */
+ DM816X_IOPAD(0x0a90, MUX_MODE2) /* GP1[8] */
+ >;
+ };
+
usb0_pins: pinmux_usb0_pins {
pinctrl-single,pins = <
DM816X_IOPAD(0x0d00, MUX_MODE0) /* USB0_DRVVBUS */
@@ -137,7 +151,12 @@
};
&mmc1 {
+ pinctrl-names = "default";
+ pinctrl-0 = <&mmc_pins>;
vmmc-supply = <&vmmcsd_fixed>;
+ bus-width = <4>;
+ cd-gpios = <&gpio2 7 GPIO_ACTIVE_LOW>;
+ wp-gpios = <&gpio2 8 GPIO_ACTIVE_LOW>;
};
/* At least dm8168-evm rev c won't support multipoint, later may */
diff --git a/arch/arm/boot/dts/dm816x.dtsi b/arch/arm/boot/dts/dm816x.dtsi
index 3c97b5f2addc..f35715bc6992 100644
--- a/arch/arm/boot/dts/dm816x.dtsi
+++ b/arch/arm/boot/dts/dm816x.dtsi
@@ -150,17 +150,27 @@
};
gpio1: gpio@48032000 {
- compatible = "ti,omap3-gpio";
+ compatible = "ti,omap4-gpio";
ti,hwmods = "gpio1";
+ ti,gpio-always-on;
reg = <0x48032000 0x1000>;
- interrupts = <97>;
+ interrupts = <96>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpio2: gpio@4804c000 {
- compatible = "ti,omap3-gpio";
+ compatible = "ti,omap4-gpio";
ti,hwmods = "gpio2";
+ ti,gpio-always-on;
reg = <0x4804c000 0x1000>;
- interrupts = <99>;
+ interrupts = <98>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
};
gpmc: gpmc@50000000 {
diff --git a/arch/arm/boot/dts/dra7-evm.dts b/arch/arm/boot/dts/dra7-evm.dts
index 3290a96ba586..7563d7ce01bb 100644
--- a/arch/arm/boot/dts/dra7-evm.dts
+++ b/arch/arm/boot/dts/dra7-evm.dts
@@ -263,17 +263,15 @@
dcan1_pins_default: dcan1_pins_default {
pinctrl-single,pins = <
- 0x3d0 (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
- 0x3d4 (MUX_MODE15) /* dcan1_rx.off */
- 0x418 (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+ 0x3d0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+ 0x418 (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */
>;
};
dcan1_pins_sleep: dcan1_pins_sleep {
pinctrl-single,pins = <
- 0x3d0 (MUX_MODE15) /* dcan1_tx.off */
- 0x3d4 (MUX_MODE15) /* dcan1_rx.off */
- 0x418 (MUX_MODE15) /* wakeup0.off */
+ 0x3d0 (MUX_MODE15 | PULL_UP) /* dcan1_tx.off */
+ 0x418 (MUX_MODE15 | PULL_UP) /* wakeup0.off */
>;
};
};
diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi
index 127608d79033..c4659a979c41 100644
--- a/arch/arm/boot/dts/dra7.dtsi
+++ b/arch/arm/boot/dts/dra7.dtsi
@@ -1111,7 +1111,6 @@
"wkupclk", "refclk",
"div-clk", "phy-div";
#phy-cells = <0>;
- ti,hwmods = "pcie1-phy";
};
pcie2_phy: pciephy@4a095000 {
@@ -1130,7 +1129,6 @@
"wkupclk", "refclk",
"div-clk", "phy-div";
#phy-cells = <0>;
- ti,hwmods = "pcie2-phy";
status = "disabled";
};
};
diff --git a/arch/arm/boot/dts/dra72-evm.dts b/arch/arm/boot/dts/dra72-evm.dts
index e0264d0bf7b9..40ed539ce474 100644
--- a/arch/arm/boot/dts/dra72-evm.dts
+++ b/arch/arm/boot/dts/dra72-evm.dts
@@ -119,17 +119,15 @@
dcan1_pins_default: dcan1_pins_default {
pinctrl-single,pins = <
- 0x3d0 (PIN_OUTPUT | MUX_MODE0) /* dcan1_tx */
- 0x3d4 (MUX_MODE15) /* dcan1_rx.off */
- 0x418 (PULL_DIS | MUX_MODE1) /* wakeup0.dcan1_rx */
+ 0x3d0 (PIN_OUTPUT_PULLUP | MUX_MODE0) /* dcan1_tx */
+ 0x418 (PULL_UP | MUX_MODE1) /* wakeup0.dcan1_rx */
>;
};
dcan1_pins_sleep: dcan1_pins_sleep {
pinctrl-single,pins = <
- 0x3d0 (MUX_MODE15) /* dcan1_tx.off */
- 0x3d4 (MUX_MODE15) /* dcan1_rx.off */
- 0x418 (MUX_MODE15) /* wakeup0.off */
+ 0x3d0 (MUX_MODE15 | PULL_UP) /* dcan1_tx.off */
+ 0x418 (MUX_MODE15 | PULL_UP) /* wakeup0.off */
>;
};
diff --git a/arch/arm/boot/dts/dra7xx-clocks.dtsi b/arch/arm/boot/dts/dra7xx-clocks.dtsi
index 4bdcbd61ce47..99b09a44e269 100644
--- a/arch/arm/boot/dts/dra7xx-clocks.dtsi
+++ b/arch/arm/boot/dts/dra7xx-clocks.dtsi
@@ -243,10 +243,18 @@
ti,invert-autoidle-bit;
};
+ dpll_core_byp_mux: dpll_core_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ ti,bit-shift = <23>;
+ reg = <0x012c>;
+ };
+
dpll_core_ck: dpll_core_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-core-clock";
- clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ clocks = <&sys_clkin1>, <&dpll_core_byp_mux>;
reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
};
@@ -309,10 +317,18 @@
clock-div = <1>;
};
+ dpll_dsp_byp_mux: dpll_dsp_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x0240>;
+ };
+
dpll_dsp_ck: dpll_dsp_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&dsp_dpll_hs_clk_div>;
+ clocks = <&sys_clkin1>, <&dpll_dsp_byp_mux>;
reg = <0x0234>, <0x0238>, <0x0240>, <0x023c>;
};
@@ -335,10 +351,18 @@
clock-div = <1>;
};
+ dpll_iva_byp_mux: dpll_iva_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x01ac>;
+ };
+
dpll_iva_ck: dpll_iva_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&iva_dpll_hs_clk_div>;
+ clocks = <&sys_clkin1>, <&dpll_iva_byp_mux>;
reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
};
@@ -361,10 +385,18 @@
clock-div = <1>;
};
+ dpll_gpu_byp_mux: dpll_gpu_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ ti,bit-shift = <23>;
+ reg = <0x02e4>;
+ };
+
dpll_gpu_ck: dpll_gpu_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ clocks = <&sys_clkin1>, <&dpll_gpu_byp_mux>;
reg = <0x02d8>, <0x02dc>, <0x02e4>, <0x02e0>;
};
@@ -398,10 +430,18 @@
clock-div = <1>;
};
+ dpll_ddr_byp_mux: dpll_ddr_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ ti,bit-shift = <23>;
+ reg = <0x021c>;
+ };
+
dpll_ddr_ck: dpll_ddr_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ clocks = <&sys_clkin1>, <&dpll_ddr_byp_mux>;
reg = <0x0210>, <0x0214>, <0x021c>, <0x0218>;
};
@@ -416,10 +456,18 @@
ti,invert-autoidle-bit;
};
+ dpll_gmac_byp_mux: dpll_gmac_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ ti,bit-shift = <23>;
+ reg = <0x02b4>;
+ };
+
dpll_gmac_ck: dpll_gmac_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&dpll_abe_m3x2_ck>;
+ clocks = <&sys_clkin1>, <&dpll_gmac_byp_mux>;
reg = <0x02a8>, <0x02ac>, <0x02b4>, <0x02b0>;
};
@@ -482,10 +530,18 @@
clock-div = <1>;
};
+ dpll_eve_byp_mux: dpll_eve_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x0290>;
+ };
+
dpll_eve_ck: dpll_eve_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&eve_dpll_hs_clk_div>;
+ clocks = <&sys_clkin1>, <&dpll_eve_byp_mux>;
reg = <0x0284>, <0x0288>, <0x0290>, <0x028c>;
};
@@ -1249,10 +1305,18 @@
clock-div = <1>;
};
+ dpll_per_byp_mux: dpll_per_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x014c>;
+ };
+
dpll_per_ck: dpll_per_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin1>, <&per_dpll_hs_clk_div>;
+ clocks = <&sys_clkin1>, <&dpll_per_byp_mux>;
reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
};
@@ -1275,10 +1339,18 @@
clock-div = <1>;
};
+ dpll_usb_byp_mux: dpll_usb_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x018c>;
+ };
+
dpll_usb_ck: dpll_usb_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-j-type-clock";
- clocks = <&sys_clkin1>, <&usb_dpll_hs_clk_div>;
+ clocks = <&sys_clkin1>, <&dpll_usb_byp_mux>;
reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
};
diff --git a/arch/arm/boot/dts/exynos3250.dtsi b/arch/arm/boot/dts/exynos3250.dtsi
index 277b48b0b6f9..ac6b0ae42caf 100644
--- a/arch/arm/boot/dts/exynos3250.dtsi
+++ b/arch/arm/boot/dts/exynos3250.dtsi
@@ -18,6 +18,7 @@
*/
#include "skeleton.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
#include <dt-bindings/clock/exynos3250.h>
/ {
@@ -193,6 +194,7 @@
interrupts = <0 216 0>;
clocks = <&cmu CLK_TMU_APBIF>;
clock-names = "tmu_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
status = "disabled";
};
diff --git a/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
new file mode 100644
index 000000000000..735cb2f10817
--- /dev/null
+++ b/arch/arm/boot/dts/exynos4-cpu-thermal.dtsi
@@ -0,0 +1,52 @@
+/*
+ * Device tree sources for Exynos4 thermal zone
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal.h>
+
+/ {
+thermal-zones {
+ cpu_thermal: cpu-thermal {
+ thermal-sensors = <&tmu 0>;
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ trips {
+ cpu_alert0: cpu-alert-0 {
+ temperature = <70000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu_alert1: cpu-alert-1 {
+ temperature = <95000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu_alert2: cpu-alert-2 {
+ temperature = <110000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu_crit0: cpu-crit-0 {
+ temperature = <120000>; /* millicelsius */
+ hysteresis = <0>; /* millicelsius */
+ type = "critical";
+ };
+ };
+ cooling-maps {
+ map0 {
+ trip = <&cpu_alert0>;
+ };
+ map1 {
+ trip = <&cpu_alert1>;
+ };
+ };
+ };
+};
+};
diff --git a/arch/arm/boot/dts/exynos4.dtsi b/arch/arm/boot/dts/exynos4.dtsi
index 76173cacd450..77ea547768f4 100644
--- a/arch/arm/boot/dts/exynos4.dtsi
+++ b/arch/arm/boot/dts/exynos4.dtsi
@@ -38,6 +38,7 @@
i2c5 = &i2c_5;
i2c6 = &i2c_6;
i2c7 = &i2c_7;
+ i2c8 = &i2c_8;
csis0 = &csis_0;
csis1 = &csis_1;
fimc0 = &fimc_0;
@@ -104,6 +105,7 @@
compatible = "samsung,exynos4210-pd";
reg = <0x10023C20 0x20>;
#power-domain-cells = <0>;
+ power-domains = <&pd_lcd0>;
};
pd_cam: cam-power-domain@10023C00 {
@@ -554,6 +556,22 @@
status = "disabled";
};
+ i2c_8: i2c@138E0000 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ compatible = "samsung,s3c2440-hdmiphy-i2c";
+ reg = <0x138E0000 0x100>;
+ interrupts = <0 93 0>;
+ clocks = <&clock CLK_I2C_HDMI>;
+ clock-names = "i2c";
+ status = "disabled";
+
+ hdmi_i2c_phy: hdmiphy@38 {
+ compatible = "exynos4210-hdmiphy";
+ reg = <0x38>;
+ };
+ };
+
spi_0: spi@13920000 {
compatible = "samsung,exynos4210-spi";
reg = <0x13920000 0x100>;
@@ -663,6 +681,33 @@
status = "disabled";
};
+ tmu: tmu@100C0000 {
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+ hdmi: hdmi@12D00000 {
+ compatible = "samsung,exynos4210-hdmi";
+ reg = <0x12D00000 0x70000>;
+ interrupts = <0 92 0>;
+ clock-names = "hdmi", "sclk_hdmi", "sclk_pixel", "sclk_hdmiphy",
+ "mout_hdmi";
+ clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
+ <&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
+ <&clock CLK_MOUT_HDMI>;
+ phy = <&hdmi_i2c_phy>;
+ power-domains = <&pd_tv>;
+ samsung,syscon-phandle = <&pmu_system_controller>;
+ status = "disabled";
+ };
+
+ mixer: mixer@12C10000 {
+ compatible = "samsung,exynos4210-mixer";
+ interrupts = <0 91 0>;
+ reg = <0x12C10000 0x2100>, <0x12c00000 0x300>;
+ power-domains = <&pd_tv>;
+ status = "disabled";
+ };
+
ppmu_dmc0: ppmu_dmc0@106a0000 {
compatible = "samsung,exynos-ppmu";
reg = <0x106a0000 0x2000>;
diff --git a/arch/arm/boot/dts/exynos4210-trats.dts b/arch/arm/boot/dts/exynos4210-trats.dts
index 3d6652a4b6cb..32c5fd8f6269 100644
--- a/arch/arm/boot/dts/exynos4210-trats.dts
+++ b/arch/arm/boot/dts/exynos4210-trats.dts
@@ -426,6 +426,25 @@
status = "okay";
};
+ tmu@100C0000 {
+ status = "okay";
+ };
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ cooling-maps {
+ map0 {
+ /* Corresponds to 800MHz at freq_table */
+ cooling-device = <&cpu0 2 2>;
+ };
+ map1 {
+ /* Corresponds to 200MHz at freq_table */
+ cooling-device = <&cpu0 4 4>;
+ };
+ };
+ };
+ };
+
camera {
pinctrl-names = "default";
pinctrl-0 = <>;
diff --git a/arch/arm/boot/dts/exynos4210-universal_c210.dts b/arch/arm/boot/dts/exynos4210-universal_c210.dts
index b57e6b82ea20..d4f2b11319dd 100644
--- a/arch/arm/boot/dts/exynos4210-universal_c210.dts
+++ b/arch/arm/boot/dts/exynos4210-universal_c210.dts
@@ -505,6 +505,63 @@
assigned-clock-rates = <0>, <160000000>;
};
};
+
+ hdmi_en: voltage-regulator-hdmi-5v {
+ compatible = "regulator-fixed";
+ regulator-name = "HDMI_5V";
+ regulator-min-microvolt = <5000000>;
+ regulator-max-microvolt = <5000000>;
+ gpio = <&gpe0 1 0>;
+ enable-active-high;
+ };
+
+ hdmi_ddc: i2c-ddc {
+ compatible = "i2c-gpio";
+ gpios = <&gpe4 2 0 &gpe4 3 0>;
+ i2c-gpio,delay-us = <100>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ pinctrl-0 = <&i2c_ddc_bus>;
+ pinctrl-names = "default";
+ status = "okay";
+ };
+
+ mixer@12C10000 {
+ status = "okay";
+ };
+
+ hdmi@12D00000 {
+ hpd-gpio = <&gpx3 7 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_hpd>;
+ hdmi-en-supply = <&hdmi_en>;
+ vdd-supply = <&ldo3_reg>;
+ vdd_osc-supply = <&ldo4_reg>;
+ vdd_pll-supply = <&ldo3_reg>;
+ ddc = <&hdmi_ddc>;
+ status = "okay";
+ };
+
+ i2c@138E0000 {
+ status = "okay";
+ };
+};
+
+&pinctrl_1 {
+ hdmi_hpd: hdmi-hpd {
+ samsung,pins = "gpx3-7";
+ samsung,pin-pud = <0>;
+ };
+};
+
+&pinctrl_0 {
+ i2c_ddc_bus: i2c-ddc-bus {
+ samsung,pins = "gpe4-2", "gpe4-3";
+ samsung,pin-function = <2>;
+ samsung,pin-pud = <3>;
+ samsung,pin-drv = <0>;
+ };
};
&mdma1 {
diff --git a/arch/arm/boot/dts/exynos4210.dtsi b/arch/arm/boot/dts/exynos4210.dtsi
index 67c832c9dcf1..be89f83f70e7 100644
--- a/arch/arm/boot/dts/exynos4210.dtsi
+++ b/arch/arm/boot/dts/exynos4210.dtsi
@@ -21,6 +21,7 @@
#include "exynos4.dtsi"
#include "exynos4210-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
/ {
compatible = "samsung,exynos4210", "samsung,exynos4";
@@ -35,10 +36,13 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@900 {
+ cpu0: cpu@900 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0x900>;
+ cooling-min-level = <4>;
+ cooling-max-level = <2>;
+ #cooling-cells = <2>; /* min followed by max */
};
cpu@901 {
@@ -153,16 +157,38 @@
reg = <0x03860000 0x1000>;
};
- tmu@100C0000 {
+ tmu: tmu@100C0000 {
compatible = "samsung,exynos4210-tmu";
interrupt-parent = <&combiner>;
reg = <0x100C0000 0x100>;
interrupts = <2 4>;
clocks = <&clock CLK_TMU_APBIF>;
clock-names = "tmu_apbif";
+ samsung,tmu_gain = <15>;
+ samsung,tmu_reference_voltage = <7>;
status = "disabled";
};
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&tmu 0>;
+
+ trips {
+ cpu_alert0: cpu-alert-0 {
+ temperature = <85000>; /* millicelsius */
+ };
+ cpu_alert1: cpu-alert-1 {
+ temperature = <100000>; /* millicelsius */
+ };
+ cpu_alert2: cpu-alert-2 {
+ temperature = <110000>; /* millicelsius */
+ };
+ };
+ };
+ };
+
g2d@12800000 {
compatible = "samsung,s5pv210-g2d";
reg = <0x12800000 0x1000>;
@@ -203,6 +229,14 @@
};
};
+ mixer: mixer@12C10000 {
+ clock-names = "mixer", "hdmi", "sclk_hdmi", "vp", "mout_mixer",
+ "sclk_mixer";
+ clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+ <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>,
+ <&clock CLK_MOUT_MIXER>, <&clock CLK_SCLK_MIXER>;
+ };
+
ppmu_lcd1: ppmu_lcd1@12240000 {
compatible = "samsung,exynos-ppmu";
reg = <0x12240000 0x2000>;
diff --git a/arch/arm/boot/dts/exynos4212.dtsi b/arch/arm/boot/dts/exynos4212.dtsi
index dd0a43ec56da..5be03288f1ee 100644
--- a/arch/arm/boot/dts/exynos4212.dtsi
+++ b/arch/arm/boot/dts/exynos4212.dtsi
@@ -26,10 +26,13 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@A00 {
+ cpu0: cpu@A00 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA00>;
+ cooling-min-level = <13>;
+ cooling-max-level = <7>;
+ #cooling-cells = <2>; /* min followed by max */
};
cpu@A01 {
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index de80b5bba204..adb4f6a97a1d 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -249,6 +249,20 @@
regulator-always-on;
};
+ ldo8_reg: ldo@8 {
+ regulator-compatible = "LDO8";
+ regulator-name = "VDD10_HDMI_1.0V";
+ regulator-min-microvolt = <1000000>;
+ regulator-max-microvolt = <1000000>;
+ };
+
+ ldo10_reg: ldo@10 {
+ regulator-compatible = "LDO10";
+ regulator-name = "VDDQ_MIPIHSI_1.8V";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
ldo11_reg: LDO11 {
regulator-name = "VDD18_ABB1_1.8V";
regulator-min-microvolt = <1800000>;
@@ -411,6 +425,51 @@
ehci: ehci@12580000 {
status = "okay";
};
+
+ tmu@100C0000 {
+ vtmu-supply = <&ldo10_reg>;
+ status = "okay";
+ };
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ cooling-maps {
+ map0 {
+ /* Corresponds to 800MHz at freq_table */
+ cooling-device = <&cpu0 7 7>;
+ };
+ map1 {
+ /* Corresponds to 200MHz at freq_table */
+ cooling-device = <&cpu0 13 13>;
+ };
+ };
+ };
+ };
+
+ mixer: mixer@12C10000 {
+ status = "okay";
+ };
+
+ hdmi@12D00000 {
+ hpd-gpio = <&gpx3 7 0>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_hpd>;
+ vdd-supply = <&ldo8_reg>;
+ vdd_osc-supply = <&ldo10_reg>;
+ vdd_pll-supply = <&ldo8_reg>;
+ ddc = <&hdmi_ddc>;
+ status = "okay";
+ };
+
+ hdmi_ddc: i2c@13880000 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&i2c2_bus>;
+ };
+
+ i2c@138E0000 {
+ status = "okay";
+ };
};
&pinctrl_1 {
@@ -425,4 +484,9 @@
samsung,pin-pud = <0>;
samsung,pin-drv = <0>;
};
+
+ hdmi_hpd: hdmi-hpd {
+ samsung,pins = "gpx3-7";
+ samsung,pin-pud = <1>;
+ };
};
diff --git a/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi
new file mode 100644
index 000000000000..e3f7934d19d0
--- /dev/null
+++ b/arch/arm/boot/dts/exynos4412-tmu-sensor-conf.dtsi
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos4412 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <8>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <55>;
+samsung,tmu_min_efuse_value = <40>;
+samsung,tmu_max_efuse_value = <100>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <85>;
+samsung,tmu_default_temp_offset = <50>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
diff --git a/arch/arm/boot/dts/exynos4412-trats2.dts b/arch/arm/boot/dts/exynos4412-trats2.dts
index 21f748083586..173ffa479ad3 100644
--- a/arch/arm/boot/dts/exynos4412-trats2.dts
+++ b/arch/arm/boot/dts/exynos4412-trats2.dts
@@ -927,6 +927,21 @@
pulldown-ohm = <100000>; /* 100K */
io-channels = <&adc 2>; /* Battery temperature */
};
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ cooling-maps {
+ map0 {
+ /* Corresponds to 800MHz at freq_table */
+ cooling-device = <&cpu0 7 7>;
+ };
+ map1 {
+ /* Corresponds to 200MHz at freq_table */
+ cooling-device = <&cpu0 13 13>;
+ };
+ };
+ };
+ };
};
&pmu_system_controller {
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index 0f6ec93bb1d8..68ad43b391ae 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -26,10 +26,13 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@A00 {
+ cpu0: cpu@A00 {
device_type = "cpu";
compatible = "arm,cortex-a9";
reg = <0xA00>;
+ cooling-min-level = <13>;
+ cooling-max-level = <7>;
+ #cooling-cells = <2>; /* min followed by max */
};
cpu@A01 {
diff --git a/arch/arm/boot/dts/exynos4x12.dtsi b/arch/arm/boot/dts/exynos4x12.dtsi
index f5e0ae780d6c..6a6abe14fd9b 100644
--- a/arch/arm/boot/dts/exynos4x12.dtsi
+++ b/arch/arm/boot/dts/exynos4x12.dtsi
@@ -19,6 +19,7 @@
#include "exynos4.dtsi"
#include "exynos4x12-pinctrl.dtsi"
+#include "exynos4-cpu-thermal.dtsi"
/ {
aliases {
@@ -297,4 +298,15 @@
clock-names = "tmu_apbif";
status = "disabled";
};
+
+ hdmi: hdmi@12D00000 {
+ compatible = "samsung,exynos4212-hdmi";
+ };
+
+ mixer: mixer@12C10000 {
+ compatible = "samsung,exynos4212-mixer";
+ clock-names = "mixer", "hdmi", "sclk_hdmi", "vp";
+ clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+ <&clock CLK_SCLK_HDMI>, <&clock CLK_VP>;
+ };
};
diff --git a/arch/arm/boot/dts/exynos5250.dtsi b/arch/arm/boot/dts/exynos5250.dtsi
index 9bb1b0b738f5..adbde1adad95 100644
--- a/arch/arm/boot/dts/exynos5250.dtsi
+++ b/arch/arm/boot/dts/exynos5250.dtsi
@@ -20,7 +20,7 @@
#include <dt-bindings/clock/exynos5250.h>
#include "exynos5.dtsi"
#include "exynos5250-pinctrl.dtsi"
-
+#include "exynos4-cpu-thermal.dtsi"
#include <dt-bindings/clock/exynos-audss-clk.h>
/ {
@@ -58,11 +58,14 @@
#address-cells = <1>;
#size-cells = <0>;
- cpu@0 {
+ cpu0: cpu@0 {
device_type = "cpu";
compatible = "arm,cortex-a15";
reg = <0>;
clock-frequency = <1700000000>;
+ cooling-min-level = <15>;
+ cooling-max-level = <9>;
+ #cooling-cells = <2>; /* min followed by max */
};
cpu@1 {
device_type = "cpu";
@@ -102,6 +105,12 @@
#power-domain-cells = <0>;
};
+ pd_disp1: disp1-power-domain@100440A0 {
+ compatible = "samsung,exynos4210-pd";
+ reg = <0x100440A0 0x20>;
+ #power-domain-cells = <0>;
+ };
+
clock: clock-controller@10010000 {
compatible = "samsung,exynos5250-clock";
reg = <0x10010000 0x30000>;
@@ -235,12 +244,32 @@
status = "disabled";
};
- tmu@10060000 {
+ tmu: tmu@10060000 {
compatible = "samsung,exynos5250-tmu";
reg = <0x10060000 0x100>;
interrupts = <0 65 0>;
clocks = <&clock CLK_TMU>;
clock-names = "tmu_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+ thermal-sensors = <&tmu 0>;
+
+ cooling-maps {
+ map0 {
+ /* Corresponds to 800MHz at freq_table */
+ cooling-device = <&cpu0 9 9>;
+ };
+ map1 {
+ /* Corresponds to 200MHz at freq_table */
+ cooling-device = <&cpu0 15 15>;
+ };
+ };
+ };
};
serial@12C00000 {
@@ -719,6 +748,7 @@
hdmi: hdmi {
compatible = "samsung,exynos4212-hdmi";
reg = <0x14530000 0x70000>;
+ power-domains = <&pd_disp1>;
interrupts = <0 95 0>;
clocks = <&clock CLK_HDMI>, <&clock CLK_SCLK_HDMI>,
<&clock CLK_SCLK_PIXEL>, <&clock CLK_SCLK_HDMIPHY>,
@@ -731,9 +761,11 @@
mixer {
compatible = "samsung,exynos5250-mixer";
reg = <0x14450000 0x10000>;
+ power-domains = <&pd_disp1>;
interrupts = <0 94 0>;
- clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
- clock-names = "mixer", "sclk_hdmi";
+ clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+ <&clock CLK_SCLK_HDMI>;
+ clock-names = "mixer", "hdmi", "sclk_hdmi";
};
dp_phy: video-phy@10040720 {
@@ -743,6 +775,7 @@
};
dp: dp-controller@145B0000 {
+ power-domains = <&pd_disp1>;
clocks = <&clock CLK_DP>;
clock-names = "dp";
phys = <&dp_phy>;
@@ -750,6 +783,7 @@
};
fimd: fimd@14400000 {
+ power-domains = <&pd_disp1>;
clocks = <&clock CLK_SCLK_FIMD1>, <&clock CLK_FIMD1>;
clock-names = "sclk_fimd", "fimd";
};
diff --git a/arch/arm/boot/dts/exynos5420-trip-points.dtsi b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
new file mode 100644
index 000000000000..5d31fc140823
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5420-trip-points.dtsi
@@ -0,0 +1,35 @@
+/*
+ * Device tree sources for default Exynos5420 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+ cpu-alert-0 {
+ temperature = <85000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu-alert-1 {
+ temperature = <103000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu-alert-2 {
+ temperature = <110000>; /* millicelsius */
+ hysteresis = <10000>; /* millicelsius */
+ type = "active";
+ };
+ cpu-crit-0 {
+ temperature = <1200000>; /* millicelsius */
+ hysteresis = <0>; /* millicelsius */
+ type = "critical";
+ };
+};
diff --git a/arch/arm/boot/dts/exynos5420.dtsi b/arch/arm/boot/dts/exynos5420.dtsi
index 9dc2e9773b30..c0e98cf3514f 100644
--- a/arch/arm/boot/dts/exynos5420.dtsi
+++ b/arch/arm/boot/dts/exynos5420.dtsi
@@ -740,8 +740,9 @@
compatible = "samsung,exynos5420-mixer";
reg = <0x14450000 0x10000>;
interrupts = <0 94 0>;
- clocks = <&clock CLK_MIXER>, <&clock CLK_SCLK_HDMI>;
- clock-names = "mixer", "sclk_hdmi";
+ clocks = <&clock CLK_MIXER>, <&clock CLK_HDMI>,
+ <&clock CLK_SCLK_HDMI>;
+ clock-names = "mixer", "hdmi", "sclk_hdmi";
power-domains = <&disp_pd>;
};
@@ -782,6 +783,7 @@
interrupts = <0 65 0>;
clocks = <&clock CLK_TMU>;
clock-names = "tmu_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
};
tmu_cpu1: tmu@10064000 {
@@ -790,6 +792,7 @@
interrupts = <0 183 0>;
clocks = <&clock CLK_TMU>;
clock-names = "tmu_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
};
tmu_cpu2: tmu@10068000 {
@@ -798,6 +801,7 @@
interrupts = <0 184 0>;
clocks = <&clock CLK_TMU>, <&clock CLK_TMU>;
clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
};
tmu_cpu3: tmu@1006c000 {
@@ -806,6 +810,7 @@
interrupts = <0 185 0>;
clocks = <&clock CLK_TMU>, <&clock CLK_TMU_GPU>;
clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
};
tmu_gpu: tmu@100a0000 {
@@ -814,6 +819,30 @@
interrupts = <0 215 0>;
clocks = <&clock CLK_TMU_GPU>, <&clock CLK_TMU>;
clock-names = "tmu_apbif", "tmu_triminfo_apbif";
+ #include "exynos4412-tmu-sensor-conf.dtsi"
+ };
+
+ thermal-zones {
+ cpu0_thermal: cpu0-thermal {
+ thermal-sensors = <&tmu_cpu0>;
+ #include "exynos5420-trip-points.dtsi"
+ };
+ cpu1_thermal: cpu1-thermal {
+ thermal-sensors = <&tmu_cpu1>;
+ #include "exynos5420-trip-points.dtsi"
+ };
+ cpu2_thermal: cpu2-thermal {
+ thermal-sensors = <&tmu_cpu2>;
+ #include "exynos5420-trip-points.dtsi"
+ };
+ cpu3_thermal: cpu3-thermal {
+ thermal-sensors = <&tmu_cpu3>;
+ #include "exynos5420-trip-points.dtsi"
+ };
+ gpu_thermal: gpu-thermal {
+ thermal-sensors = <&tmu_gpu>;
+ #include "exynos5420-trip-points.dtsi"
+ };
};
watchdog: watchdog@101D0000 {
diff --git a/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi
new file mode 100644
index 000000000000..7b2fba0ae92b
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5440-tmu-sensor-conf.dtsi
@@ -0,0 +1,24 @@
+/*
+ * Device tree sources for Exynos5440 TMU sensor configuration
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <dt-bindings/thermal/thermal_exynos.h>
+
+#thermal-sensor-cells = <0>;
+samsung,tmu_gain = <5>;
+samsung,tmu_reference_voltage = <16>;
+samsung,tmu_noise_cancel_mode = <4>;
+samsung,tmu_efuse_value = <0x5d2d>;
+samsung,tmu_min_efuse_value = <16>;
+samsung,tmu_max_efuse_value = <76>;
+samsung,tmu_first_point_trim = <25>;
+samsung,tmu_second_point_trim = <70>;
+samsung,tmu_default_temp_offset = <25>;
+samsung,tmu_cal_type = <TYPE_ONE_POINT_TRIMMING>;
diff --git a/arch/arm/boot/dts/exynos5440-trip-points.dtsi b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
new file mode 100644
index 000000000000..48adfa8f4300
--- /dev/null
+++ b/arch/arm/boot/dts/exynos5440-trip-points.dtsi
@@ -0,0 +1,25 @@
+/*
+ * Device tree sources for default Exynos5440 thermal zone definition
+ *
+ * Copyright (c) 2014 Lukasz Majewski <l.majewski@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+polling-delay-passive = <0>;
+polling-delay = <0>;
+trips {
+ cpu-alert-0 {
+ temperature = <100000>; /* millicelsius */
+ hysteresis = <0>; /* millicelsius */
+ type = "active";
+ };
+ cpu-crit-0 {
+ temperature = <1050000>; /* millicelsius */
+ hysteresis = <0>; /* millicelsius */
+ type = "critical";
+ };
+};
diff --git a/arch/arm/boot/dts/exynos5440.dtsi b/arch/arm/boot/dts/exynos5440.dtsi
index 8f3373cd7b87..59d9416b3b03 100644
--- a/arch/arm/boot/dts/exynos5440.dtsi
+++ b/arch/arm/boot/dts/exynos5440.dtsi
@@ -219,6 +219,7 @@
interrupts = <0 58 0>;
clocks = <&clock CLK_B_125>;
clock-names = "tmu_apbif";
+ #include "exynos5440-tmu-sensor-conf.dtsi"
};
tmuctrl_1: tmuctrl@16011C {
@@ -227,6 +228,7 @@
interrupts = <0 58 0>;
clocks = <&clock CLK_B_125>;
clock-names = "tmu_apbif";
+ #include "exynos5440-tmu-sensor-conf.dtsi"
};
tmuctrl_2: tmuctrl@160120 {
@@ -235,6 +237,22 @@
interrupts = <0 58 0>;
clocks = <&clock CLK_B_125>;
clock-names = "tmu_apbif";
+ #include "exynos5440-tmu-sensor-conf.dtsi"
+ };
+
+ thermal-zones {
+ cpu0_thermal: cpu0-thermal {
+ thermal-sensors = <&tmuctrl_0>;
+ #include "exynos5440-trip-points.dtsi"
+ };
+ cpu1_thermal: cpu1-thermal {
+ thermal-sensors = <&tmuctrl_1>;
+ #include "exynos5440-trip-points.dtsi"
+ };
+ cpu2_thermal: cpu2-thermal {
+ thermal-sensors = <&tmuctrl_2>;
+ #include "exynos5440-trip-points.dtsi"
+ };
};
sata@210000 {
diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index f1cd2147421d..a626e6dd8022 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -35,6 +35,7 @@
regulator-max-microvolt = <5000000>;
gpio = <&gpio3 22 0>;
enable-active-high;
+ vin-supply = <&swbst_reg>;
};
reg_usb_h1_vbus: regulator@1 {
@@ -45,6 +46,7 @@
regulator-max-microvolt = <5000000>;
gpio = <&gpio1 29 0>;
enable-active-high;
+ vin-supply = <&swbst_reg>;
};
reg_audio: regulator@2 {
diff --git a/arch/arm/boot/dts/imx6sl-evk.dts b/arch/arm/boot/dts/imx6sl-evk.dts
index fda4932faefd..945887d3fdb3 100644
--- a/arch/arm/boot/dts/imx6sl-evk.dts
+++ b/arch/arm/boot/dts/imx6sl-evk.dts
@@ -52,6 +52,7 @@
regulator-max-microvolt = <5000000>;
gpio = <&gpio4 0 0>;
enable-active-high;
+ vin-supply = <&swbst_reg>;
};
reg_usb_otg2_vbus: regulator@1 {
@@ -62,6 +63,7 @@
regulator-max-microvolt = <5000000>;
gpio = <&gpio4 2 0>;
enable-active-high;
+ vin-supply = <&swbst_reg>;
};
reg_aud3v: regulator@2 {
diff --git a/arch/arm/boot/dts/omap3.dtsi b/arch/arm/boot/dts/omap3.dtsi
index f4f78c40b564..3fdc84fddb70 100644
--- a/arch/arm/boot/dts/omap3.dtsi
+++ b/arch/arm/boot/dts/omap3.dtsi
@@ -92,6 +92,8 @@
ti,hwmods = "aes";
reg = <0x480c5000 0x50>;
interrupts = <0>;
+ dmas = <&sdma 65 &sdma 66>;
+ dma-names = "tx", "rx";
};
prm: prm@48306000 {
@@ -550,6 +552,8 @@
ti,hwmods = "sham";
reg = <0x480c3000 0x64>;
interrupts = <49>;
+ dmas = <&sdma 69>;
+ dma-names = "rx";
};
smartreflex_core: smartreflex@480cb000 {
diff --git a/arch/arm/boot/dts/omap5-core-thermal.dtsi b/arch/arm/boot/dts/omap5-core-thermal.dtsi
index 19212ac6eef0..de8a3d456cf7 100644
--- a/arch/arm/boot/dts/omap5-core-thermal.dtsi
+++ b/arch/arm/boot/dts/omap5-core-thermal.dtsi
@@ -13,7 +13,7 @@
core_thermal: core_thermal {
polling-delay-passive = <250>; /* milliseconds */
- polling-delay = <1000>; /* milliseconds */
+ polling-delay = <500>; /* milliseconds */
/* sensor ID */
thermal-sensors = <&bandgap 2>;
diff --git a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
index 1b87aca88b77..bc3090f2e84b 100644
--- a/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
+++ b/arch/arm/boot/dts/omap5-gpu-thermal.dtsi
@@ -13,7 +13,7 @@
gpu_thermal: gpu_thermal {
polling-delay-passive = <250>; /* milliseconds */
- polling-delay = <1000>; /* milliseconds */
+ polling-delay = <500>; /* milliseconds */
/* sensor ID */
thermal-sensors = <&bandgap 1>;
diff --git a/arch/arm/boot/dts/omap5.dtsi b/arch/arm/boot/dts/omap5.dtsi
index ddff674bd05e..4a485b63a141 100644
--- a/arch/arm/boot/dts/omap5.dtsi
+++ b/arch/arm/boot/dts/omap5.dtsi
@@ -1079,4 +1079,8 @@
};
};
+&cpu_thermal {
+ polling-delay = <500>; /* milliseconds */
+};
+
/include/ "omap54xx-clocks.dtsi"
diff --git a/arch/arm/boot/dts/omap54xx-clocks.dtsi b/arch/arm/boot/dts/omap54xx-clocks.dtsi
index 58c27466f012..83b425fb3ac2 100644
--- a/arch/arm/boot/dts/omap54xx-clocks.dtsi
+++ b/arch/arm/boot/dts/omap54xx-clocks.dtsi
@@ -167,10 +167,18 @@
ti,index-starts-at-one;
};
+ dpll_core_byp_mux: dpll_core_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+ ti,bit-shift = <23>;
+ reg = <0x012c>;
+ };
+
dpll_core_ck: dpll_core_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-core-clock";
- clocks = <&sys_clkin>, <&dpll_abe_m3x2_ck>;
+ clocks = <&sys_clkin>, <&dpll_core_byp_mux>;
reg = <0x0120>, <0x0124>, <0x012c>, <0x0128>;
};
@@ -294,10 +302,18 @@
clock-div = <1>;
};
+ dpll_iva_byp_mux: dpll_iva_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x01ac>;
+ };
+
dpll_iva_ck: dpll_iva_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin>, <&iva_dpll_hs_clk_div>;
+ clocks = <&sys_clkin>, <&dpll_iva_byp_mux>;
reg = <0x01a0>, <0x01a4>, <0x01ac>, <0x01a8>;
};
@@ -599,10 +615,19 @@
};
};
&cm_core_clocks {
+
+ dpll_per_byp_mux: dpll_per_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x014c>;
+ };
+
dpll_per_ck: dpll_per_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-clock";
- clocks = <&sys_clkin>, <&per_dpll_hs_clk_div>;
+ clocks = <&sys_clkin>, <&dpll_per_byp_mux>;
reg = <0x0140>, <0x0144>, <0x014c>, <0x0148>;
};
@@ -714,10 +739,18 @@
ti,index-starts-at-one;
};
+ dpll_usb_byp_mux: dpll_usb_byp_mux {
+ #clock-cells = <0>;
+ compatible = "ti,mux-clock";
+ clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+ ti,bit-shift = <23>;
+ reg = <0x018c>;
+ };
+
dpll_usb_ck: dpll_usb_ck {
#clock-cells = <0>;
compatible = "ti,omap4-dpll-j-type-clock";
- clocks = <&sys_clkin>, <&usb_dpll_hs_clk_div>;
+ clocks = <&sys_clkin>, <&dpll_usb_byp_mux>;
reg = <0x0180>, <0x0184>, <0x018c>, <0x0188>;
};
diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi
index d771f687a13b..eccc78d3220b 100644
--- a/arch/arm/boot/dts/rk3288.dtsi
+++ b/arch/arm/boot/dts/rk3288.dtsi
@@ -411,6 +411,7 @@
"mac_clk_rx", "mac_clk_tx",
"clk_mac_ref", "clk_mac_refout",
"aclk_mac", "pclk_mac";
+ status = "disabled";
};
usb_host0_ehci: usb@ff500000 {
diff --git a/arch/arm/boot/dts/sama5d3.dtsi b/arch/arm/boot/dts/sama5d3.dtsi
index 261311bdf65b..367af53c1b84 100644
--- a/arch/arm/boot/dts/sama5d3.dtsi
+++ b/arch/arm/boot/dts/sama5d3.dtsi
@@ -1248,7 +1248,6 @@
atmel,watchdog-type = "hardware";
atmel,reset-type = "all";
atmel,dbg-halt;
- atmel,idle-halt;
status = "disabled";
};
@@ -1416,7 +1415,7 @@
compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
reg = <0x00700000 0x100000>;
interrupts = <32 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+ clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
clock-names = "usb_clk", "ehci_clk", "uhpck";
status = "disabled";
};
diff --git a/arch/arm/boot/dts/sama5d4.dtsi b/arch/arm/boot/dts/sama5d4.dtsi
index d986b41b9654..4303874889c6 100644
--- a/arch/arm/boot/dts/sama5d4.dtsi
+++ b/arch/arm/boot/dts/sama5d4.dtsi
@@ -66,6 +66,7 @@
gpio4 = &pioE;
tcb0 = &tcb0;
tcb1 = &tcb1;
+ i2c0 = &i2c0;
i2c2 = &i2c2;
};
cpus {
@@ -259,7 +260,7 @@
compatible = "atmel,at91sam9g45-ehci", "usb-ehci";
reg = <0x00600000 0x100000>;
interrupts = <46 IRQ_TYPE_LEVEL_HIGH 2>;
- clocks = <&usb>, <&uhphs_clk>, <&uhpck>;
+ clocks = <&utmi>, <&uhphs_clk>, <&uhpck>;
clock-names = "usb_clk", "ehci_clk", "uhpck";
status = "disabled";
};
@@ -461,8 +462,8 @@
lcdck: lcdck {
#clock-cells = <0>;
- reg = <4>;
- clocks = <&smd>;
+ reg = <3>;
+ clocks = <&mck>;
};
smdck: smdck {
@@ -770,7 +771,7 @@
reg = <50>;
};
- lcd_clk: lcd_clk {
+ lcdc_clk: lcdc_clk {
#clock-cells = <0>;
reg = <51>;
};
diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 252c3d1bda50..d9176e606173 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -660,7 +660,7 @@
#address-cells = <1>;
#size-cells = <0>;
reg = <0xfff01000 0x1000>;
- interrupts = <0 156 4>;
+ interrupts = <0 155 4>;
num-cs = <4>;
clocks = <&spi_m_clk>;
status = "disabled";
@@ -713,6 +713,9 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&l4_sp_clk>;
+ dmas = <&pdma 28>,
+ <&pdma 29>;
+ dma-names = "tx", "rx";
};
uart1: serial1@ffc03000 {
@@ -722,6 +725,9 @@
reg-shift = <2>;
reg-io-width = <4>;
clocks = <&l4_sp_clk>;
+ dmas = <&pdma 30>,
+ <&pdma 31>;
+ dma-names = "tx", "rx";
};
rst: rstmgr@ffd05000 {
diff --git a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
index ab7891c43231..75742f8f96f3 100644
--- a/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
+++ b/arch/arm/boot/dts/sun4i-a10-olinuxino-lime.dts
@@ -56,6 +56,22 @@
model = "Olimex A10-OLinuXino-LIME";
compatible = "olimex,a10-olinuxino-lime", "allwinner,sun4i-a10";
+ cpus {
+ cpu0: cpu@0 {
+ /*
+ * The A10-Lime is known to be unstable
+ * when running at 1008 MHz
+ */
+ operating-points = <
+ /* kHz uV */
+ 912000 1350000
+ 864000 1300000
+ 624000 1250000
+ >;
+ cooling-max-level = <2>;
+ };
+ };
+
soc@01c00000 {
emac: ethernet@01c0b000 {
pinctrl-names = "default";
diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index 5c2925831f20..eebb7853e00b 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi
@@ -75,7 +75,6 @@
clock-latency = <244144>; /* 8 32k periods */
operating-points = <
/* kHz uV */
- 1056000 1500000
1008000 1400000
912000 1350000
864000 1300000
@@ -83,7 +82,7 @@
>;
#cooling-cells = <2>;
cooling-min-level = <0>;
- cooling-max-level = <4>;
+ cooling-max-level = <3>;
};
};
diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index f8818f1edbbe..883cb4873688 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi
@@ -47,7 +47,6 @@
clock-latency = <244144>; /* 8 32k periods */
operating-points = <
/* kHz uV */
- 1104000 1500000
1008000 1400000
912000 1350000
864000 1300000
@@ -57,7 +56,7 @@
>;
#cooling-cells = <2>;
cooling-min-level = <0>;
- cooling-max-level = <6>;
+ cooling-max-level = <5>;
};
};
diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 3a8530b79f1c..fdd181792b4b 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi
@@ -105,7 +105,6 @@
clock-latency = <244144>; /* 8 32k periods */
operating-points = <
/* kHz uV */
- 1008000 1450000
960000 1400000
912000 1400000
864000 1300000
@@ -116,7 +115,7 @@
>;
#cooling-cells = <2>;
cooling-min-level = <0>;
- cooling-max-level = <7>;
+ cooling-max-level = <6>;
};
cpu@1 {
diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c
index 6eaddc47c43d..37dc0fe1093f 100644
--- a/arch/arm/common/bL_switcher.c
+++ b/arch/arm/common/bL_switcher.c
@@ -151,8 +151,6 @@ static int bL_switch_to(unsigned int new_cluster_id)
unsigned int mpidr, this_cpu, that_cpu;
unsigned int ob_mpidr, ob_cpu, ob_cluster, ib_mpidr, ib_cpu, ib_cluster;
struct completion inbound_alive;
- struct tick_device *tdev;
- enum clock_event_mode tdev_mode;
long volatile *handshake_ptr;
int ipi_nr, ret;
@@ -219,13 +217,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
/* redirect GIC's SGIs to our counterpart */
gic_migrate_target(bL_gic_id[ib_cpu][ib_cluster]);
- tdev = tick_get_device(this_cpu);
- if (tdev && !cpumask_equal(tdev->evtdev->cpumask, cpumask_of(this_cpu)))
- tdev = NULL;
- if (tdev) {
- tdev_mode = tdev->evtdev->mode;
- clockevents_set_mode(tdev->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
- }
+ tick_suspend_local();
ret = cpu_pm_enter();
@@ -251,11 +243,7 @@ static int bL_switch_to(unsigned int new_cluster_id)
ret = cpu_pm_exit();
- if (tdev) {
- clockevents_set_mode(tdev->evtdev, tdev_mode);
- clockevents_program_event(tdev->evtdev,
- tdev->evtdev->next_event, 1);
- }
+ tick_resume_local();
trace_cpu_migrate_finish(ktime_get_real_ns(), ib_mpidr);
local_fiq_enable();
diff --git a/arch/arm/configs/at91_dt_defconfig b/arch/arm/configs/at91_dt_defconfig
index f2670f638e97..811e72bbe642 100644
--- a/arch/arm/configs/at91_dt_defconfig
+++ b/arch/arm/configs/at91_dt_defconfig
@@ -70,6 +70,7 @@ CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
# CONFIG_SCSI_LOWLEVEL is not set
CONFIG_NETDEVICES=y
+CONFIG_ARM_AT91_ETHER=y
CONFIG_MACB=y
# CONFIG_NET_VENDOR_BROADCOM is not set
CONFIG_DM9000=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index b7e6b6fba5e0..06075b6d2463 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -99,7 +99,7 @@ CONFIG_PCI_RCAR_GEN2=y
CONFIG_PCI_RCAR_GEN2_PCIE=y
CONFIG_PCIEPORTBUS=y
CONFIG_SMP=y
-CONFIG_NR_CPUS=8
+CONFIG_NR_CPUS=16
CONFIG_HIGHPTE=y
CONFIG_CMA=y
CONFIG_ARM_APPENDED_DTB=y
diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig
index a097cffa1231..8e108599e1af 100644
--- a/arch/arm/configs/omap2plus_defconfig
+++ b/arch/arm/configs/omap2plus_defconfig
@@ -377,6 +377,7 @@ CONFIG_PWM_TWL=m
CONFIG_PWM_TWL_LED=m
CONFIG_OMAP_USB2=m
CONFIG_TI_PIPE3=y
+CONFIG_TWL4030_USB=m
CONFIG_EXT2_FS=y
CONFIG_EXT3_FS=y
# CONFIG_EXT3_FS_XATTR is not set
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index 41d856effe6c..510c747c65b4 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -3,8 +3,6 @@
CONFIG_SYSVIPC=y
CONFIG_IRQ_DOMAIN_DEBUG=y
CONFIG_LOG_BUF_SHIFT=14
-CONFIG_SYSFS_DEPRECATED=y
-CONFIG_SYSFS_DEPRECATED_V2=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EMBEDDED=y
CONFIG_SLAB=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index 38840a812924..8f6a5702b696 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -4,6 +4,7 @@ CONFIG_BLK_DEV_INITRD=y
CONFIG_PERF_EVENTS=y
CONFIG_ARCH_SUNXI=y
CONFIG_SMP=y
+CONFIG_NR_CPUS=8
CONFIG_AEABI=y
CONFIG_HIGHMEM=y
CONFIG_HIGHPTE=y
diff --git a/arch/arm/configs/vexpress_defconfig b/arch/arm/configs/vexpress_defconfig
index f489fdaa19b8..37fe607a4ede 100644
--- a/arch/arm/configs/vexpress_defconfig
+++ b/arch/arm/configs/vexpress_defconfig
@@ -118,8 +118,8 @@ CONFIG_HID_ZEROPLUS=y
CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
-CONFIG_USB_ISP1760_HCD=y
CONFIG_USB_STORAGE=y
+CONFIG_USB_ISP1760=y
CONFIG_MMC=y
CONFIG_MMC_ARMMMCI=y
CONFIG_NEW_LEDS=y
diff --git a/arch/arm/crypto/aesbs-core.S_shipped b/arch/arm/crypto/aesbs-core.S_shipped
index 71e5fc7cfb18..1d1800f71c5b 100644
--- a/arch/arm/crypto/aesbs-core.S_shipped
+++ b/arch/arm/crypto/aesbs-core.S_shipped
@@ -58,14 +58,18 @@
# define VFP_ABI_FRAME 0
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__ 7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
#endif
#ifdef __thumb__
# define adrl adr
#endif
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__
@@ -74,8 +78,6 @@
.code 32
#endif
-.fpu neon
-
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
@@ -2095,9 +2097,11 @@ bsaes_xts_decrypt:
vld1.8 {q8}, [r0] @ initial tweak
adr r2, .Lxts_magic
+#ifndef XTS_CHAIN_TWEAK
tst r9, #0xf @ if not multiple of 16
it ne @ Thumb2 thing, sanity check in ARM
subne r9, #0x10 @ subtract another 16 bytes
+#endif
subs r9, #0x80
blo .Lxts_dec_short
diff --git a/arch/arm/crypto/bsaes-armv7.pl b/arch/arm/crypto/bsaes-armv7.pl
index be068db960ee..a4d3856e7d24 100644
--- a/arch/arm/crypto/bsaes-armv7.pl
+++ b/arch/arm/crypto/bsaes-armv7.pl
@@ -701,14 +701,18 @@ $code.=<<___;
# define VFP_ABI_FRAME 0
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
-# define __ARM_ARCH__ 7
+# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ 7
#endif
#ifdef __thumb__
# define adrl adr
#endif
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__
@@ -717,8 +721,6 @@ $code.=<<___;
.code 32
#endif
-.fpu neon
-
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
@@ -2076,9 +2078,11 @@ bsaes_xts_decrypt:
vld1.8 {@XMM[8]}, [r0] @ initial tweak
adr $magic, .Lxts_magic
+#ifndef XTS_CHAIN_TWEAK
tst $len, #0xf @ if not multiple of 16
it ne @ Thumb2 thing, sanity check in ARM
subne $len, #0x10 @ subtract another 16 bytes
+#endif
subs $len, #0x80
blo .Lxts_dec_short
diff --git a/arch/arm/include/asm/jump_label.h b/arch/arm/include/asm/jump_label.h
index 70f9b9bfb1f9..5f337dc5c108 100644
--- a/arch/arm/include/asm/jump_label.h
+++ b/arch/arm/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_ARM_JUMP_LABEL_H
#define _ASM_ARM_JUMP_LABEL_H
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -27,8 +27,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
typedef u32 jump_label_t;
struct jump_entry {
@@ -37,4 +35,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index 816db0bf2dd8..d995821f1698 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -185,6 +185,7 @@
#define HSR_COND (0xfU << HSR_COND_SHIFT)
#define FSC_FAULT (0x04)
+#define FSC_ACCESS (0x08)
#define FSC_PERM (0x0c)
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 41008cd7c53f..d71607c16601 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -27,6 +27,8 @@
#include <asm/fpstate.h>
#include <kvm/arm_arch_timer.h>
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
#else
@@ -165,19 +167,10 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
- unsigned long end)
-{
- return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
- return 0;
-}
-
static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address)
{
diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
index 3f83db2f6cf0..d8e90c8cb5fa 100644
--- a/arch/arm/include/asm/kvm_mmio.h
+++ b/arch/arm/include/asm/kvm_mmio.h
@@ -28,28 +28,6 @@ struct kvm_decode {
bool sign_extend;
};
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
- phys_addr_t phys_addr;
- u8 data[8];
- u32 len;
- bool is_write;
- void *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- run->mmio.phys_addr = mmio->phys_addr;
- run->mmio.len = mmio->len;
- run->mmio.is_write = mmio->is_write;
- memcpy(run->mmio.data, mmio->data, mmio->len);
- run->exit_reason = KVM_EXIT_MMIO;
-}
-
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa);
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 37ca2a4c6f09..4cf48c3aca13 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -149,29 +149,28 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
(__boundary - 1 < (end) - 1)? __boundary: (end); \
})
+#define kvm_pgd_index(addr) pgd_index(addr)
+
static inline bool kvm_page_empty(void *ptr)
{
struct page *ptr_page = virt_to_page(ptr);
return page_count(ptr_page) == 1;
}
-
#define kvm_pte_table_empty(kvm, ptep) kvm_page_empty(ptep)
#define kvm_pmd_table_empty(kvm, pmdp) kvm_page_empty(pmdp)
#define kvm_pud_table_empty(kvm, pudp) (0)
#define KVM_PREALLOC_LEVEL 0
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
+static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
- return 0;
+ return kvm->arch.pgd;
}
-static inline void kvm_free_hwpgd(struct kvm *kvm) { }
-
-static inline void *kvm_get_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- return kvm->arch.pgd;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
struct kvm;
@@ -207,7 +206,7 @@ static inline void __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
bool need_flush = !vcpu_has_cache_enabled(vcpu) || ipa_uncached;
- VM_BUG_ON(size & PAGE_MASK);
+ VM_BUG_ON(size & ~PAGE_MASK);
if (!need_flush && !icache_is_pipt())
goto vipt_cache;
diff --git a/arch/arm/include/asm/mach/time.h b/arch/arm/include/asm/mach/time.h
index 90c12e1e695c..0f79e4dec7f9 100644
--- a/arch/arm/include/asm/mach/time.h
+++ b/arch/arm/include/asm/mach/time.h
@@ -12,8 +12,7 @@
extern void timer_tick(void);
-struct timespec;
-typedef void (*clock_access_fn)(struct timespec *);
+typedef void (*clock_access_fn)(struct timespec64 *);
extern int register_persistent_clock(clock_access_fn read_boot,
clock_access_fn read_persistent);
diff --git a/arch/arm/include/debug/at91.S b/arch/arm/include/debug/at91.S
index 80a6501b4d50..c3c45e628e33 100644
--- a/arch/arm/include/debug/at91.S
+++ b/arch/arm/include/debug/at91.S
@@ -18,8 +18,11 @@
#define AT91_DBGU 0xfc00c000 /* SAMA5D4_BASE_USART3 */
#endif
-/* Keep in sync with mach-at91/include/mach/hardware.h */
+#ifdef CONFIG_MMU
#define AT91_IO_P2V(x) ((x) - 0x01000000)
+#else
+#define AT91_IO_P2V(x) (x)
+#endif
#define AT91_DBGU_SR (0x14) /* Status Register */
#define AT91_DBGU_THR (0x1c) /* Transmitter Holding Register */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 0db25bc32864..2499867dd0d8 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -198,6 +198,9 @@ struct kvm_arch_memory_slot {
/* Highest supported SPI, from VGIC_NR_IRQS */
#define KVM_ARM_IRQ_GIC_MAX 127
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS 1
+
/* PSCI interface */
#define KVM_PSCI_FN_BASE 0x95c1ba5e
#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c
index 2d2d6087b9b1..488eaac56028 100644
--- a/arch/arm/kernel/asm-offsets.c
+++ b/arch/arm/kernel/asm-offsets.c
@@ -190,7 +190,6 @@ int main(void)
DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar));
DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar));
DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc));
-#ifdef CONFIG_KVM_ARM_VGIC
DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu));
DEFINE(VGIC_V2_CPU_HCR, offsetof(struct vgic_cpu, vgic_v2.vgic_hcr));
DEFINE(VGIC_V2_CPU_VMCR, offsetof(struct vgic_cpu, vgic_v2.vgic_vmcr));
@@ -200,14 +199,11 @@ int main(void)
DEFINE(VGIC_V2_CPU_APR, offsetof(struct vgic_cpu, vgic_v2.vgic_apr));
DEFINE(VGIC_V2_CPU_LR, offsetof(struct vgic_cpu, vgic_v2.vgic_lr));
DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr));
-#ifdef CONFIG_KVM_ARM_TIMER
DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl));
DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval));
DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff));
DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled));
-#endif
DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base));
-#endif
DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr));
#endif
return 0;
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index e55408e96559..1d60bebea4b8 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -246,12 +246,9 @@ static int __get_cpu_architecture(void)
if (cpu_arch)
cpu_arch += CPU_ARCH_ARMv3;
} else if ((read_cpuid_id() & 0x000f0000) == 0x000f0000) {
- unsigned int mmfr0;
-
/* Revised CPUID format. Read the Memory Model Feature
* Register 0 and check for VMSAv7 or PMSAv7 */
- asm("mrc p15, 0, %0, c0, c1, 4"
- : "=r" (mmfr0));
+ unsigned int mmfr0 = read_cpuid_ext(CPUID_EXT_MMFR0);
if ((mmfr0 & 0x0000000f) >= 0x00000003 ||
(mmfr0 & 0x000000f0) >= 0x00000030)
cpu_arch = CPU_ARCH_ARMv7;
diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c
index 0cc7e58c47cc..a66e37e211a9 100644
--- a/arch/arm/kernel/time.c
+++ b/arch/arm/kernel/time.c
@@ -76,7 +76,7 @@ void timer_tick(void)
}
#endif
-static void dummy_clock_access(struct timespec *ts)
+static void dummy_clock_access(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
@@ -85,12 +85,12 @@ static void dummy_clock_access(struct timespec *ts)
static clock_access_fn __read_persistent_clock = dummy_clock_access;
static clock_access_fn __read_boot_clock = dummy_clock_access;;
-void read_persistent_clock(struct timespec *ts)
+void read_persistent_clock64(struct timespec64 *ts)
{
__read_persistent_clock(ts);
}
-void read_boot_clock(struct timespec *ts)
+void read_boot_clock64(struct timespec64 *ts)
{
__read_boot_clock(ts);
}
diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig
index 338ace78ed18..f1f79d104309 100644
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@@ -18,6 +18,7 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
+ depends on MMU && OF
select PREEMPT_NOTIFIERS
select ANON_INODES
select HAVE_KVM_CPU_RELAX_INTERCEPT
@@ -26,10 +27,12 @@ config KVM
select KVM_ARM_HOST
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU
- depends on ARM_VIRT_EXT && ARM_LPAE
+ select MMU_NOTIFIER
+ select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQFD
+ depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
---help---
- Support hosting virtualized guest machines. You will also
- need to select one or more of the processor modules below.
+ Support hosting virtualized guest machines.
This module provides access to the hardware capabilities through
a character device node named /dev/kvm.
@@ -37,10 +40,7 @@ config KVM
If unsure, say N.
config KVM_ARM_HOST
- bool "KVM host support for ARM cpus."
- depends on KVM
- depends on MMU
- select MMU_NOTIFIER
+ bool
---help---
Provides host support for ARM processors.
@@ -55,20 +55,4 @@ config KVM_ARM_MAX_VCPUS
large, so only choose a reasonable number that you expect to
actually use.
-config KVM_ARM_VGIC
- bool "KVM support for Virtual GIC"
- depends on KVM_ARM_HOST && OF
- select HAVE_KVM_IRQCHIP
- default y
- ---help---
- Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
- bool "KVM support for Architected Timers"
- depends on KVM_ARM_VGIC && ARM_ARCH_TIMER
- select HAVE_KVM_IRQCHIP
- default y
- ---help---
- Adds support for the Architected Timers in virtual machines
-
endif # VIRTUALIZATION
diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile
index 443b8bea43e9..139e46c08b6e 100644
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@@ -7,7 +7,7 @@ ifeq ($(plus_virt),+virt)
plus_virt_def := -DREQUIRES_VIRT=1
endif
-ccflags-y += -Ivirt/kvm -Iarch/arm/kvm
+ccflags-y += -Iarch/arm/kvm
CFLAGS_arm.o := -I. $(plus_virt_def)
CFLAGS_mmu.o := -I.
@@ -15,12 +15,12 @@ AFLAGS_init.o := -Wa,-march=armv7-a$(plus_virt)
AFLAGS_interrupts.o := -Wa,-march=armv7-a$(plus_virt)
KVM := ../../../virt/kvm
-kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-arm-y = $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
obj-y += kvm-arm.o init.o interrupts.o
obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o
obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-obj-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-obj-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+obj-y += $(KVM)/arm/vgic.o
+obj-y += $(KVM)/arm/vgic-v2.o
+obj-y += $(KVM)/arm/vgic-v2-emul.o
+obj-y += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 07e7eb1d7ab6..6f536451ab78 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -61,8 +61,6 @@ static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1);
static u8 kvm_next_vmid;
static DEFINE_SPINLOCK(kvm_vmid_lock);
-static bool vgic_present;
-
static void kvm_arm_set_running_vcpu(struct kvm_vcpu *vcpu)
{
BUG_ON(preemptible());
@@ -173,8 +171,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int r;
switch (ext) {
case KVM_CAP_IRQCHIP:
- r = vgic_present;
- break;
+ case KVM_CAP_IRQFD:
+ case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_USER_MEMORY:
case KVM_CAP_SYNC_MMU:
@@ -183,6 +181,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ARM_PSCI:
case KVM_CAP_ARM_PSCI_0_2:
case KVM_CAP_READONLY_MEM:
+ case KVM_CAP_MP_STATE:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
@@ -268,7 +267,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return 0;
+ return kvm_timer_should_fire(vcpu);
}
int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@@ -313,13 +312,29 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL;
+ if (vcpu->arch.pause)
+ mp_state->mp_state = KVM_MP_STATE_STOPPED;
+ else
+ mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
+
+ return 0;
}
int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
{
- return -EINVAL;
+ switch (mp_state->mp_state) {
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.pause = false;
+ break;
+ case KVM_MP_STATE_STOPPED:
+ vcpu->arch.pause = true;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
}
/**
@@ -452,6 +467,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
return 0;
}
+bool kvm_arch_intc_initialized(struct kvm *kvm)
+{
+ return vgic_initialized(kvm);
+}
+
static void vcpu_pause(struct kvm_vcpu *vcpu)
{
wait_queue_head_t *wq = kvm_arch_vcpu_wq(vcpu);
@@ -540,7 +560,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
vcpu->mode = OUTSIDE_GUEST_MODE;
kvm_guest_exit();
- trace_kvm_exit(*vcpu_pc(vcpu));
+ trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
/*
* We may have taken a host interrupt in HYP mode (ie
* while executing the guest). This interrupt is still
@@ -831,8 +851,6 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
switch (dev_id) {
case KVM_ARM_DEVICE_VGIC_V2:
- if (!vgic_present)
- return -ENXIO;
return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
default:
return -ENODEV;
@@ -847,10 +865,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
switch (ioctl) {
case KVM_CREATE_IRQCHIP: {
- if (vgic_present)
- return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
- else
- return -ENXIO;
+ return kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2);
}
case KVM_ARM_SET_DEVICE_ADDR: {
struct kvm_arm_device_addr dev_addr;
@@ -1035,10 +1050,6 @@ static int init_hyp_mode(void)
if (err)
goto out_free_context;
-#ifdef CONFIG_KVM_ARM_VGIC
- vgic_present = true;
-#endif
-
/*
* Init HYP architected timer support
*/
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 384bab67c462..d503fbb787d3 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,22 +109,6 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
return -EINVAL;
}
-#ifndef CONFIG_KVM_ARM_TIMER
-
-#define NUM_TIMER_REGS 0
-
-static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
-{
- return 0;
-}
-
-static bool is_timer_reg(u64 index)
-{
- return false;
-}
-
-#else
-
#define NUM_TIMER_REGS 3
static bool is_timer_reg(u64 index)
@@ -152,8 +136,6 @@ static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
return 0;
}
-#endif
-
static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(long)reg->addr;
diff --git a/arch/arm/kvm/interrupts_head.S b/arch/arm/kvm/interrupts_head.S
index 14d488388480..35e4a3a0c476 100644
--- a/arch/arm/kvm/interrupts_head.S
+++ b/arch/arm/kvm/interrupts_head.S
@@ -402,7 +402,6 @@ vcpu .req r0 @ vcpu pointer always in r0
* Assumes vcpu pointer in vcpu reg
*/
.macro save_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -460,7 +459,6 @@ ARM_BE8(rev r6, r6 )
subs r4, r4, #1
bne 1b
2:
-#endif
.endm
/*
@@ -469,7 +467,6 @@ ARM_BE8(rev r6, r6 )
* Assumes vcpu pointer in vcpu reg
*/
.macro restore_vgic_state
-#ifdef CONFIG_KVM_ARM_VGIC
/* Get VGIC VCTRL base into r2 */
ldr r2, [vcpu, #VCPU_KVM]
ldr r2, [r2, #KVM_VGIC_VCTRL]
@@ -501,7 +498,6 @@ ARM_BE8(rev r6, r6 )
subs r4, r4, #1
bne 1b
2:
-#endif
.endm
#define CNTHCTL_PL1PCTEN (1 << 0)
@@ -515,7 +511,6 @@ ARM_BE8(rev r6, r6 )
* Clobbers r2-r5
*/
.macro save_timer_state
-#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -537,7 +532,6 @@ ARM_BE8(rev r6, r6 )
mcrr p15, 4, r2, r2, c14 @ CNTVOFF
1:
-#endif
@ Allow physical timer/counter access for the host
mrc p15, 4, r2, c14, c1, 0 @ CNTHCTL
orr r2, r2, #(CNTHCTL_PL1PCEN | CNTHCTL_PL1PCTEN)
@@ -559,7 +553,6 @@ ARM_BE8(rev r6, r6 )
bic r2, r2, #CNTHCTL_PL1PCEN
mcr p15, 4, r2, c14, c1, 0 @ CNTHCTL
-#ifdef CONFIG_KVM_ARM_TIMER
ldr r4, [vcpu, #VCPU_KVM]
ldr r2, [r4, #KVM_TIMER_ENABLED]
cmp r2, #0
@@ -579,7 +572,6 @@ ARM_BE8(rev r6, r6 )
and r2, r2, #3
mcr p15, 0, r2, c14, c3, 1 @ CNTV_CTL
1:
-#endif
.endm
.equ vmentry, 0
diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 5d3bfc0eb3f0..974b1c606d04 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -121,12 +121,11 @@ int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
return 0;
}
-static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
- struct kvm_exit_mmio *mmio)
+static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len)
{
unsigned long rt;
- int len;
- bool is_write, sign_extend;
+ int access_size;
+ bool sign_extend;
if (kvm_vcpu_dabt_isextabt(vcpu)) {
/* cache operation on I/O addr, tell guest unsupported */
@@ -140,17 +139,15 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return 1;
}
- len = kvm_vcpu_dabt_get_as(vcpu);
- if (unlikely(len < 0))
- return len;
+ access_size = kvm_vcpu_dabt_get_as(vcpu);
+ if (unlikely(access_size < 0))
+ return access_size;
- is_write = kvm_vcpu_dabt_iswrite(vcpu);
+ *is_write = kvm_vcpu_dabt_iswrite(vcpu);
sign_extend = kvm_vcpu_dabt_issext(vcpu);
rt = kvm_vcpu_dabt_get_rd(vcpu);
- mmio->is_write = is_write;
- mmio->phys_addr = fault_ipa;
- mmio->len = len;
+ *len = access_size;
vcpu->arch.mmio_decode.sign_extend = sign_extend;
vcpu->arch.mmio_decode.rt = rt;
@@ -165,20 +162,20 @@ static int decode_hsr(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa)
{
- struct kvm_exit_mmio mmio;
unsigned long data;
unsigned long rt;
int ret;
+ bool is_write;
+ int len;
+ u8 data_buf[8];
/*
- * Prepare MMIO operation. First stash it in a private
- * structure that we can use for in-kernel emulation. If the
- * kernel can't handle it, copy it into run->mmio and let user
- * space do its magic.
+ * Prepare MMIO operation. First decode the syndrome data we get
+ * from the CPU. Then try if some in-kernel emulation feels
+ * responsible, otherwise let user space do its magic.
*/
-
if (kvm_vcpu_dabt_isvalid(vcpu)) {
- ret = decode_hsr(vcpu, fault_ipa, &mmio);
+ ret = decode_hsr(vcpu, &is_write, &len);
if (ret)
return ret;
} else {
@@ -188,21 +185,34 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
rt = vcpu->arch.mmio_decode.rt;
- if (mmio.is_write) {
- data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
- mmio.len);
+ if (is_write) {
+ data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), len);
+
+ trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, data);
+ mmio_write_buf(data_buf, len, data);
- trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
- fault_ipa, data);
- mmio_write_buf(mmio.data, mmio.len, data);
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
} else {
- trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+ trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len,
fault_ipa, 0);
+
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len,
+ data_buf);
}
- if (vgic_handle_mmio(vcpu, run, &mmio))
+ /* Now prepare kvm_run for the potential return to userland. */
+ run->mmio.is_write = is_write;
+ run->mmio.phys_addr = fault_ipa;
+ run->mmio.len = len;
+ memcpy(run->mmio.data, data_buf, len);
+
+ if (!ret) {
+ /* We handled the access successfully in the kernel. */
+ kvm_handle_mmio_return(vcpu, run);
return 1;
+ }
- kvm_prepare_mmio(run, &mmio);
+ run->exit_reason = KVM_EXIT_MMIO;
return 0;
}
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 3e6859bc3e11..15b050d46fc9 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -290,7 +290,7 @@ static void unmap_range(struct kvm *kvm, pgd_t *pgdp,
phys_addr_t addr = start, end = start + size;
phys_addr_t next;
- pgd = pgdp + pgd_index(addr);
+ pgd = pgdp + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
if (!pgd_none(*pgd))
@@ -355,7 +355,7 @@ static void stage2_flush_memslot(struct kvm *kvm,
phys_addr_t next;
pgd_t *pgd;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
next = kvm_pgd_addr_end(addr, end);
stage2_flush_puds(kvm, pgd, addr, next);
@@ -632,6 +632,20 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
__phys_to_pfn(phys_addr), PAGE_HYP_DEVICE);
}
+/* Free the HW pgd, one page at a time */
+static void kvm_free_hwpgd(void *hwpgd)
+{
+ free_pages_exact(hwpgd, kvm_get_hwpgd_size());
+}
+
+/* Allocate the HW PGD, making sure that each page gets its own refcount */
+static void *kvm_alloc_hwpgd(void)
+{
+ unsigned int size = kvm_get_hwpgd_size();
+
+ return alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
+}
+
/**
* kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation.
* @kvm: The KVM struct pointer for the VM.
@@ -645,15 +659,31 @@ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr)
*/
int kvm_alloc_stage2_pgd(struct kvm *kvm)
{
- int ret;
pgd_t *pgd;
+ void *hwpgd;
if (kvm->arch.pgd != NULL) {
kvm_err("kvm_arch already initialized?\n");
return -EINVAL;
}
+ hwpgd = kvm_alloc_hwpgd();
+ if (!hwpgd)
+ return -ENOMEM;
+
+ /* When the kernel uses more levels of page tables than the
+ * guest, we allocate a fake PGD and pre-populate it to point
+ * to the next-level page table, which will be the real
+ * initial page table pointed to by the VTTBR.
+ *
+ * When KVM_PREALLOC_LEVEL==2, we allocate a single page for
+ * the PMD and the kernel will use folded pud.
+ * When KVM_PREALLOC_LEVEL==1, we allocate 2 consecutive PUD
+ * pages.
+ */
if (KVM_PREALLOC_LEVEL > 0) {
+ int i;
+
/*
* Allocate fake pgd for the page table manipulation macros to
* work. This is not used by the hardware and we have no
@@ -661,30 +691,32 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm)
*/
pgd = (pgd_t *)kmalloc(PTRS_PER_S2_PGD * sizeof(pgd_t),
GFP_KERNEL | __GFP_ZERO);
+
+ if (!pgd) {
+ kvm_free_hwpgd(hwpgd);
+ return -ENOMEM;
+ }
+
+ /* Plug the HW PGD into the fake one. */
+ for (i = 0; i < PTRS_PER_S2_PGD; i++) {
+ if (KVM_PREALLOC_LEVEL == 1)
+ pgd_populate(NULL, pgd + i,
+ (pud_t *)hwpgd + i * PTRS_PER_PUD);
+ else if (KVM_PREALLOC_LEVEL == 2)
+ pud_populate(NULL, pud_offset(pgd, 0) + i,
+ (pmd_t *)hwpgd + i * PTRS_PER_PMD);
+ }
} else {
/*
* Allocate actual first-level Stage-2 page table used by the
* hardware for Stage-2 page table walks.
*/
- pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, S2_PGD_ORDER);
+ pgd = (pgd_t *)hwpgd;
}
- if (!pgd)
- return -ENOMEM;
-
- ret = kvm_prealloc_hwpgd(kvm, pgd);
- if (ret)
- goto out_err;
-
kvm_clean_pgd(pgd);
kvm->arch.pgd = pgd;
return 0;
-out_err:
- if (KVM_PREALLOC_LEVEL > 0)
- kfree(pgd);
- else
- free_pages((unsigned long)pgd, S2_PGD_ORDER);
- return ret;
}
/**
@@ -785,11 +817,10 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
return;
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- kvm_free_hwpgd(kvm);
+ kvm_free_hwpgd(kvm_get_hwpgd(kvm));
if (KVM_PREALLOC_LEVEL > 0)
kfree(kvm->arch.pgd);
- else
- free_pages((unsigned long)kvm->arch.pgd, S2_PGD_ORDER);
+
kvm->arch.pgd = NULL;
}
@@ -799,7 +830,7 @@ static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache
pgd_t *pgd;
pud_t *pud;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
if (WARN_ON(pgd_none(*pgd))) {
if (!cache)
return NULL;
@@ -1089,7 +1120,7 @@ static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end)
pgd_t *pgd;
phys_addr_t next;
- pgd = kvm->arch.pgd + pgd_index(addr);
+ pgd = kvm->arch.pgd + kvm_pgd_index(addr);
do {
/*
* Release kvm_mmu_lock periodically if the memory region is
@@ -1299,10 +1330,51 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
out_unlock:
spin_unlock(&kvm->mmu_lock);
+ kvm_set_pfn_accessed(pfn);
kvm_release_pfn_clean(pfn);
return ret;
}
+/*
+ * Resolve the access fault by making the page young again.
+ * Note that because the faulting entry is guaranteed not to be
+ * cached in the TLB, we don't need to invalidate anything.
+ */
+static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+ pfn_t pfn;
+ bool pfn_valid = false;
+
+ trace_kvm_access_fault(fault_ipa);
+
+ spin_lock(&vcpu->kvm->mmu_lock);
+
+ pmd = stage2_get_pmd(vcpu->kvm, NULL, fault_ipa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ goto out;
+
+ if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ *pmd = pmd_mkyoung(*pmd);
+ pfn = pmd_pfn(*pmd);
+ pfn_valid = true;
+ goto out;
+ }
+
+ pte = pte_offset_kernel(pmd, fault_ipa);
+ if (pte_none(*pte)) /* Nothing there either */
+ goto out;
+
+ *pte = pte_mkyoung(*pte); /* Just a page... */
+ pfn = pte_pfn(*pte);
+ pfn_valid = true;
+out:
+ spin_unlock(&vcpu->kvm->mmu_lock);
+ if (pfn_valid)
+ kvm_set_pfn_accessed(pfn);
+}
+
/**
* kvm_handle_guest_abort - handles all 2nd stage aborts
* @vcpu: the VCPU pointer
@@ -1333,7 +1405,8 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Check the stage-2 fault is trans. fault or write fault */
fault_status = kvm_vcpu_trap_get_fault_type(vcpu);
- if (fault_status != FSC_FAULT && fault_status != FSC_PERM) {
+ if (fault_status != FSC_FAULT && fault_status != FSC_PERM &&
+ fault_status != FSC_ACCESS) {
kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n",
kvm_vcpu_trap_get_class(vcpu),
(unsigned long)kvm_vcpu_trap_get_fault(vcpu),
@@ -1369,6 +1442,12 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run)
/* Userspace should not be able to register out-of-bounds IPAs */
VM_BUG_ON(fault_ipa >= KVM_PHYS_SIZE);
+ if (fault_status == FSC_ACCESS) {
+ handle_access_fault(vcpu, fault_ipa);
+ ret = 1;
+ goto out_unlock;
+ }
+
ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status);
if (ret == 0)
ret = 1;
@@ -1377,15 +1456,16 @@ out_unlock:
return ret;
}
-static void handle_hva_to_gpa(struct kvm *kvm,
- unsigned long start,
- unsigned long end,
- void (*handler)(struct kvm *kvm,
- gpa_t gpa, void *data),
- void *data)
+static int handle_hva_to_gpa(struct kvm *kvm,
+ unsigned long start,
+ unsigned long end,
+ int (*handler)(struct kvm *kvm,
+ gpa_t gpa, void *data),
+ void *data)
{
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
+ int ret = 0;
slots = kvm_memslots(kvm);
@@ -1409,14 +1489,17 @@ static void handle_hva_to_gpa(struct kvm *kvm,
for (; gfn < gfn_end; ++gfn) {
gpa_t gpa = gfn << PAGE_SHIFT;
- handler(kvm, gpa, data);
+ ret |= handler(kvm, gpa, data);
}
}
+
+ return ret;
}
-static void kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
unmap_stage2_range(kvm, gpa, PAGE_SIZE);
+ return 0;
}
int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
@@ -1442,7 +1525,7 @@ int kvm_unmap_hva_range(struct kvm *kvm,
return 0;
}
-static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
+static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
{
pte_t *pte = (pte_t *)data;
@@ -1454,6 +1537,7 @@ static void kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, void *data)
* through this calling path.
*/
stage2_set_pte(kvm, NULL, gpa, pte, 0);
+ return 0;
}
@@ -1470,6 +1554,67 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte);
}
+static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pmd = stage2_get_pmd(kvm, NULL, gpa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ return 0;
+
+ if (kvm_pmd_huge(*pmd)) { /* THP, HugeTLB */
+ if (pmd_young(*pmd)) {
+ *pmd = pmd_mkold(*pmd);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ pte = pte_offset_kernel(pmd, gpa);
+ if (pte_none(*pte))
+ return 0;
+
+ if (pte_young(*pte)) {
+ *pte = pte_mkold(*pte); /* Just a page... */
+ return 1;
+ }
+
+ return 0;
+}
+
+static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, void *data)
+{
+ pmd_t *pmd;
+ pte_t *pte;
+
+ pmd = stage2_get_pmd(kvm, NULL, gpa);
+ if (!pmd || pmd_none(*pmd)) /* Nothing there */
+ return 0;
+
+ if (kvm_pmd_huge(*pmd)) /* THP, HugeTLB */
+ return pmd_young(*pmd);
+
+ pte = pte_offset_kernel(pmd, gpa);
+ if (!pte_none(*pte)) /* Just a page... */
+ return pte_young(*pte);
+
+ return 0;
+}
+
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+ trace_kvm_age_hva(start, end);
+ return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
+}
+
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
+{
+ trace_kvm_test_age_hva(hva);
+ return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
+}
+
void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu)
{
mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 881874b1a036..0ec35392d208 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,18 +25,22 @@ TRACE_EVENT(kvm_entry,
);
TRACE_EVENT(kvm_exit,
- TP_PROTO(unsigned long vcpu_pc),
- TP_ARGS(vcpu_pc),
+ TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
+ TP_ARGS(exit_reason, vcpu_pc),
TP_STRUCT__entry(
+ __field( unsigned int, exit_reason )
__field( unsigned long, vcpu_pc )
),
TP_fast_assign(
+ __entry->exit_reason = exit_reason;
__entry->vcpu_pc = vcpu_pc;
),
- TP_printk("PC: 0x%08lx", __entry->vcpu_pc)
+ TP_printk("HSR_EC: 0x%04x, PC: 0x%08lx",
+ __entry->exit_reason,
+ __entry->vcpu_pc)
);
TRACE_EVENT(kvm_guest_fault,
@@ -64,6 +68,21 @@ TRACE_EVENT(kvm_guest_fault,
__entry->hxfar, __entry->vcpu_pc)
);
+TRACE_EVENT(kvm_access_fault,
+ TP_PROTO(unsigned long ipa),
+ TP_ARGS(ipa),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, ipa )
+ ),
+
+ TP_fast_assign(
+ __entry->ipa = ipa;
+ ),
+
+ TP_printk("IPA: %lx", __entry->ipa)
+);
+
TRACE_EVENT(kvm_irq_line,
TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level),
TP_ARGS(type, vcpu_idx, irq_num, level),
@@ -206,6 +225,39 @@ TRACE_EVENT(kvm_set_spte_hva,
TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva)
);
+TRACE_EVENT(kvm_age_hva,
+ TP_PROTO(unsigned long start, unsigned long end),
+ TP_ARGS(start, end),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, start )
+ __field( unsigned long, end )
+ ),
+
+ TP_fast_assign(
+ __entry->start = start;
+ __entry->end = end;
+ ),
+
+ TP_printk("mmu notifier age hva: %#08lx -- %#08lx",
+ __entry->start, __entry->end)
+);
+
+TRACE_EVENT(kvm_test_age_hva,
+ TP_PROTO(unsigned long hva),
+ TP_ARGS(hva),
+
+ TP_STRUCT__entry(
+ __field( unsigned long, hva )
+ ),
+
+ TP_fast_assign(
+ __entry->hva = hva;
+ ),
+
+ TP_printk("mmu notifier test age hva: %#08lx", __entry->hva)
+);
+
TRACE_EVENT(kvm_hvc,
TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm),
TP_ARGS(vcpu_pc, r0, imm),
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 5e34fb143309..aa4116e9452f 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -270,37 +270,35 @@ static void __init at91_pm_sram_init(void)
phys_addr_t sram_pbase;
unsigned long sram_base;
struct device_node *node;
- struct platform_device *pdev;
+ struct platform_device *pdev = NULL;
- node = of_find_compatible_node(NULL, NULL, "mmio-sram");
- if (!node) {
- pr_warn("%s: failed to find sram node!\n", __func__);
- return;
+ for_each_compatible_node(node, NULL, "mmio-sram") {
+ pdev = of_find_device_by_node(node);
+ if (pdev) {
+ of_node_put(node);
+ break;
+ }
}
- pdev = of_find_device_by_node(node);
if (!pdev) {
pr_warn("%s: failed to find sram device!\n", __func__);
- goto put_node;
+ return;
}
sram_pool = dev_get_gen_pool(&pdev->dev);
if (!sram_pool) {
pr_warn("%s: sram pool unavailable!\n", __func__);
- goto put_node;
+ return;
}
sram_base = gen_pool_alloc(sram_pool, at91_slow_clock_sz);
if (!sram_base) {
pr_warn("%s: unable to alloc ocram!\n", __func__);
- goto put_node;
+ return;
}
sram_pbase = gen_pool_virt_to_phys(sram_pool, sram_base);
slow_clock = __arm_ioremap_exec(sram_pbase, at91_slow_clock_sz, false);
-
-put_node:
- of_node_put(node);
}
#endif
diff --git a/arch/arm/mach-at91/pm.h b/arch/arm/mach-at91/pm.h
index d2c89963af2d..86c0aa819d25 100644
--- a/arch/arm/mach-at91/pm.h
+++ b/arch/arm/mach-at91/pm.h
@@ -44,7 +44,7 @@ static inline void at91rm9200_standby(void)
" mcr p15, 0, %0, c7, c0, 4\n\t"
" str %5, [%1, %2]"
:
- : "r" (0), "r" (AT91_BASE_SYS), "r" (AT91RM9200_SDRAMC_LPR),
+ : "r" (0), "r" (at91_ramc_base[0]), "r" (AT91RM9200_SDRAMC_LPR),
"r" (1), "r" (AT91RM9200_SDRAMC_SRR),
"r" (lpr));
}
diff --git a/arch/arm/mach-at91/pm_slowclock.S b/arch/arm/mach-at91/pm_slowclock.S
index 556151e85ec4..931f0e302c03 100644
--- a/arch/arm/mach-at91/pm_slowclock.S
+++ b/arch/arm/mach-at91/pm_slowclock.S
@@ -25,11 +25,6 @@
*/
#undef SLOWDOWN_MASTER_CLOCK
-#define MCKRDY_TIMEOUT 1000
-#define MOSCRDY_TIMEOUT 1000
-#define PLLALOCK_TIMEOUT 1000
-#define PLLBLOCK_TIMEOUT 1000
-
pmc .req r0
sdramc .req r1
ramc1 .req r2
@@ -41,60 +36,42 @@ tmp2 .req r5
* Wait until master clock is ready (after switching master clock source)
*/
.macro wait_mckrdy
- mov tmp2, #MCKRDY_TIMEOUT
-1: sub tmp2, tmp2, #1
- cmp tmp2, #0
- beq 2f
- ldr tmp1, [pmc, #AT91_PMC_SR]
+1: ldr tmp1, [pmc, #AT91_PMC_SR]
tst tmp1, #AT91_PMC_MCKRDY
beq 1b
-2:
.endm
/*
* Wait until master oscillator has stabilized.
*/
.macro wait_moscrdy
- mov tmp2, #MOSCRDY_TIMEOUT
-1: sub tmp2, tmp2, #1
- cmp tmp2, #0
- beq 2f
- ldr tmp1, [pmc, #AT91_PMC_SR]
+1: ldr tmp1, [pmc, #AT91_PMC_SR]
tst tmp1, #AT91_PMC_MOSCS
beq 1b
-2:
.endm
/*
* Wait until PLLA has locked.
*/
.macro wait_pllalock
- mov tmp2, #PLLALOCK_TIMEOUT
-1: sub tmp2, tmp2, #1
- cmp tmp2, #0
- beq 2f
- ldr tmp1, [pmc, #AT91_PMC_SR]
+1: ldr tmp1, [pmc, #AT91_PMC_SR]
tst tmp1, #AT91_PMC_LOCKA
beq 1b
-2:
.endm
/*
* Wait until PLLB has locked.
*/
.macro wait_pllblock
- mov tmp2, #PLLBLOCK_TIMEOUT
-1: sub tmp2, tmp2, #1
- cmp tmp2, #0
- beq 2f
- ldr tmp1, [pmc, #AT91_PMC_SR]
+1: ldr tmp1, [pmc, #AT91_PMC_SR]
tst tmp1, #AT91_PMC_LOCKB
beq 1b
-2:
.endm
.text
+ .arm
+
/* void at91_slow_clock(void __iomem *pmc, void __iomem *sdramc,
* void __iomem *ramc1, int memctrl)
*/
@@ -134,6 +111,16 @@ ddr_sr_enable:
cmp memctrl, #AT91_MEMCTRL_DDRSDR
bne sdr_sr_enable
+ /* LPDDR1 --> force DDR2 mode during self-refresh */
+ ldr tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+ str tmp1, .saved_sam9_mdr
+ bic tmp1, tmp1, #~AT91_DDRSDRC_MD
+ cmp tmp1, #AT91_DDRSDRC_MD_LOW_POWER_DDR
+ ldreq tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+ biceq tmp1, tmp1, #AT91_DDRSDRC_MD
+ orreq tmp1, tmp1, #AT91_DDRSDRC_MD_DDR2
+ streq tmp1, [sdramc, #AT91_DDRSDRC_MDR]
+
/* prepare for DDRAM self-refresh mode */
ldr tmp1, [sdramc, #AT91_DDRSDRC_LPR]
str tmp1, .saved_sam9_lpr
@@ -142,14 +129,26 @@ ddr_sr_enable:
/* figure out if we use the second ram controller */
cmp ramc1, #0
- ldrne tmp2, [ramc1, #AT91_DDRSDRC_LPR]
- strne tmp2, .saved_sam9_lpr1
- bicne tmp2, #AT91_DDRSDRC_LPCB
- orrne tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH
+ beq ddr_no_2nd_ctrl
+
+ ldr tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+ str tmp2, .saved_sam9_mdr1
+ bic tmp2, tmp2, #~AT91_DDRSDRC_MD
+ cmp tmp2, #AT91_DDRSDRC_MD_LOW_POWER_DDR
+ ldreq tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+ biceq tmp2, tmp2, #AT91_DDRSDRC_MD
+ orreq tmp2, tmp2, #AT91_DDRSDRC_MD_DDR2
+ streq tmp2, [ramc1, #AT91_DDRSDRC_MDR]
+
+ ldr tmp2, [ramc1, #AT91_DDRSDRC_LPR]
+ str tmp2, .saved_sam9_lpr1
+ bic tmp2, #AT91_DDRSDRC_LPCB
+ orr tmp2, #AT91_DDRSDRC_LPCB_SELF_REFRESH
/* Enable DDRAM self-refresh mode */
+ str tmp2, [ramc1, #AT91_DDRSDRC_LPR]
+ddr_no_2nd_ctrl:
str tmp1, [sdramc, #AT91_DDRSDRC_LPR]
- strne tmp2, [ramc1, #AT91_DDRSDRC_LPR]
b sdr_sr_done
@@ -208,6 +207,7 @@ sdr_sr_done:
/* Turn off the main oscillator */
ldr tmp1, [pmc, #AT91_CKGR_MOR]
bic tmp1, tmp1, #AT91_PMC_MOSCEN
+ orr tmp1, tmp1, #AT91_PMC_KEY
str tmp1, [pmc, #AT91_CKGR_MOR]
/* Wait for interrupt */
@@ -216,6 +216,7 @@ sdr_sr_done:
/* Turn on the main oscillator */
ldr tmp1, [pmc, #AT91_CKGR_MOR]
orr tmp1, tmp1, #AT91_PMC_MOSCEN
+ orr tmp1, tmp1, #AT91_PMC_KEY
str tmp1, [pmc, #AT91_CKGR_MOR]
wait_moscrdy
@@ -280,12 +281,17 @@ sdr_sr_done:
*/
cmp memctrl, #AT91_MEMCTRL_DDRSDR
bne sdr_en_restore
+ /* Restore MDR in case of LPDDR1 */
+ ldr tmp1, .saved_sam9_mdr
+ str tmp1, [sdramc, #AT91_DDRSDRC_MDR]
/* Restore LPR on AT91 with DDRAM */
ldr tmp1, .saved_sam9_lpr
str tmp1, [sdramc, #AT91_DDRSDRC_LPR]
/* if we use the second ram controller */
cmp ramc1, #0
+ ldrne tmp2, .saved_sam9_mdr1
+ strne tmp2, [ramc1, #AT91_DDRSDRC_MDR]
ldrne tmp2, .saved_sam9_lpr1
strne tmp2, [ramc1, #AT91_DDRSDRC_LPR]
@@ -319,5 +325,11 @@ ram_restored:
.saved_sam9_lpr1:
.word 0
+.saved_sam9_mdr:
+ .word 0
+
+.saved_sam9_mdr1:
+ .word 0
+
ENTRY(at91_slow_clock_sz)
.word .-at91_slow_clock
diff --git a/arch/arm/mach-exynos/platsmp.c b/arch/arm/mach-exynos/platsmp.c
index 3f32c47a6d74..d2e9f12d12f1 100644
--- a/arch/arm/mach-exynos/platsmp.c
+++ b/arch/arm/mach-exynos/platsmp.c
@@ -126,8 +126,7 @@ static inline void platform_do_lowpower(unsigned int cpu, int *spurious)
*/
void exynos_cpu_power_down(int cpu)
{
- if (cpu == 0 && (of_machine_is_compatible("samsung,exynos5420") ||
- of_machine_is_compatible("samsung,exynos5800"))) {
+ if (cpu == 0 && (soc_is_exynos5420() || soc_is_exynos5800())) {
/*
* Bypass power down for CPU0 during suspend. Check for
* the SYS_PWR_REG value to decide if we are suspending
diff --git a/arch/arm/mach-exynos/pm_domains.c b/arch/arm/mach-exynos/pm_domains.c
index 20f267121b3e..37266a826437 100644
--- a/arch/arm/mach-exynos/pm_domains.c
+++ b/arch/arm/mach-exynos/pm_domains.c
@@ -161,6 +161,34 @@ no_clk:
of_genpd_add_provider_simple(np, &pd->pd);
}
+ /* Assign the child power domains to their parents */
+ for_each_compatible_node(np, NULL, "samsung,exynos4210-pd") {
+ struct generic_pm_domain *child_domain, *parent_domain;
+ struct of_phandle_args args;
+
+ args.np = np;
+ args.args_count = 0;
+ child_domain = of_genpd_get_from_provider(&args);
+ if (!child_domain)
+ continue;
+
+ if (of_parse_phandle_with_args(np, "power-domains",
+ "#power-domain-cells", 0, &args) != 0)
+ continue;
+
+ parent_domain = of_genpd_get_from_provider(&args);
+ if (!parent_domain)
+ continue;
+
+ if (pm_genpd_add_subdomain(parent_domain, child_domain))
+ pr_warn("%s failed to add subdomain: %s\n",
+ parent_domain->name, child_domain->name);
+ else
+ pr_info("%s has as child subdomain: %s.\n",
+ parent_domain->name, child_domain->name);
+ of_node_put(np);
+ }
+
return 0;
}
arch_initcall(exynos4_pm_init_power_domain);
diff --git a/arch/arm/mach-exynos/suspend.c b/arch/arm/mach-exynos/suspend.c
index 52e2b1a2fddb..318d127df147 100644
--- a/arch/arm/mach-exynos/suspend.c
+++ b/arch/arm/mach-exynos/suspend.c
@@ -87,8 +87,8 @@ static unsigned int exynos_pmu_spare3;
static u32 exynos_irqwake_intmask = 0xffffffff;
static const struct exynos_wkup_irq exynos3250_wkup_irq[] = {
- { 73, BIT(1) }, /* RTC alarm */
- { 74, BIT(2) }, /* RTC tick */
+ { 105, BIT(1) }, /* RTC alarm */
+ { 106, BIT(2) }, /* RTC tick */
{ /* sentinel */ },
};
diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c
index 4ad6e473cf83..9de3412af406 100644
--- a/arch/arm/mach-imx/mach-imx6q.c
+++ b/arch/arm/mach-imx/mach-imx6q.c
@@ -211,8 +211,9 @@ static void __init imx6q_1588_init(void)
* set bit IOMUXC_GPR1[21]. Or the PTP clock must be from pad
* (external OSC), and we need to clear the bit.
*/
- clksel = ptp_clk == enet_ref ? IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
- IMX6Q_GPR1_ENET_CLK_SEL_PAD;
+ clksel = clk_is_match(ptp_clk, enet_ref) ?
+ IMX6Q_GPR1_ENET_CLK_SEL_ANATOP :
+ IMX6Q_GPR1_ENET_CLK_SEL_PAD;
gpr = syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
if (!IS_ERR(gpr))
regmap_update_bits(gpr, IOMUXC_GPR1,
diff --git a/arch/arm/mach-msm/board-halibut.c b/arch/arm/mach-msm/board-halibut.c
index 61bfe584a9d7..fc832040c6e9 100644
--- a/arch/arm/mach-msm/board-halibut.c
+++ b/arch/arm/mach-msm/board-halibut.c
@@ -20,6 +20,7 @@
#include <linux/input.h>
#include <linux/io.h>
#include <linux/delay.h>
+#include <linux/smc91x.h>
#include <mach/hardware.h>
#include <asm/mach-types.h>
@@ -46,15 +47,20 @@ static struct resource smc91x_resources[] = {
[1] = {
.start = MSM_GPIO_TO_INT(49),
.end = MSM_GPIO_TO_INT(49),
- .flags = IORESOURCE_IRQ,
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
},
};
+static struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
+
static struct platform_device smc91x_device = {
.name = "smc91x",
.id = 0,
.num_resources = ARRAY_SIZE(smc91x_resources),
.resource = smc91x_resources,
+ .dev.platform_data = &smc91x_platdata,
};
static struct platform_device *devices[] __initdata = {
diff --git a/arch/arm/mach-msm/board-qsd8x50.c b/arch/arm/mach-msm/board-qsd8x50.c
index 4c748616ef47..10016a3bc698 100644
--- a/arch/arm/mach-msm/board-qsd8x50.c
+++ b/arch/arm/mach-msm/board-qsd8x50.c
@@ -22,6 +22,7 @@
#include <linux/usb/msm_hsusb.h>
#include <linux/err.h>
#include <linux/clkdev.h>
+#include <linux/smc91x.h>
#include <asm/mach-types.h>
#include <asm/mach/arch.h>
@@ -49,15 +50,20 @@ static struct resource smc91x_resources[] = {
.flags = IORESOURCE_MEM,
},
[1] = {
- .flags = IORESOURCE_IRQ,
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
},
};
+static struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
+
static struct platform_device smc91x_device = {
.name = "smc91x",
.id = 0,
.num_resources = ARRAY_SIZE(smc91x_resources),
.resource = smc91x_resources,
+ .dev.platform_data = &smc91x_platdata,
};
static int __init msm_init_smc91x(void)
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 01e398a868bc..57d429830e09 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -14,7 +14,7 @@
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/export.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <asm/cpuidle.h>
#include <asm/proc-fns.h>
@@ -84,7 +84,6 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
{
struct idle_statedata *cx = state_ptr + index;
u32 mpuss_can_lose_context = 0;
- int cpu_id = smp_processor_id();
/*
* CPU0 has to wait and stay ON until CPU1 is OFF state.
@@ -112,7 +111,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
mpuss_can_lose_context = (cx->mpu_state == PWRDM_POWER_RET) &&
(cx->mpu_logic_state == PWRDM_POWER_OFF);
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu_id);
+ tick_broadcast_enter();
/*
* Call idle CPU PM enter notifier chain so that
@@ -169,7 +168,7 @@ static int omap_enter_idle_coupled(struct cpuidle_device *dev,
if (dev->cpu == 0 && mpuss_can_lose_context)
cpu_cluster_pm_exit();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu_id);
+ tick_broadcast_exit();
fail:
cpuidle_coupled_parallel_barrier(dev, &abort_barrier);
@@ -184,8 +183,7 @@ fail:
*/
static void omap_setup_broadcast_timer(void *arg)
{
- int cpu = smp_processor_id();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ON, &cpu);
+ tick_broadcast_enable();
}
static struct cpuidle_driver omap4_idle_driver = {
diff --git a/arch/arm/mach-omap2/id.c b/arch/arm/mach-omap2/id.c
index 2a2f4d56e4c8..25f1beea453e 100644
--- a/arch/arm/mach-omap2/id.c
+++ b/arch/arm/mach-omap2/id.c
@@ -720,6 +720,8 @@ static const char * __init omap_get_family(void)
return kasprintf(GFP_KERNEL, "OMAP4");
else if (soc_is_omap54xx())
return kasprintf(GFP_KERNEL, "OMAP5");
+ else if (soc_is_am33xx() || soc_is_am335x())
+ return kasprintf(GFP_KERNEL, "AM33xx");
else if (soc_is_am43xx())
return kasprintf(GFP_KERNEL, "AM43xx");
else if (soc_is_dra7xx())
diff --git a/arch/arm/mach-omap2/omap_hwmod.c b/arch/arm/mach-omap2/omap_hwmod.c
index 92afb723dcfc..355b08936871 100644
--- a/arch/arm/mach-omap2/omap_hwmod.c
+++ b/arch/arm/mach-omap2/omap_hwmod.c
@@ -1692,16 +1692,15 @@ static int _deassert_hardreset(struct omap_hwmod *oh, const char *name)
if (ret == -EBUSY)
pr_warn("omap_hwmod: %s: failed to hardreset\n", oh->name);
- if (!ret) {
+ if (oh->clkdm) {
/*
* Set the clockdomain to HW_AUTO, assuming that the
* previous state was HW_AUTO.
*/
- if (oh->clkdm && hwsup)
+ if (hwsup)
clkdm_allow_idle(oh->clkdm);
- } else {
- if (oh->clkdm)
- clkdm_hwmod_disable(oh->clkdm, oh);
+
+ clkdm_hwmod_disable(oh->clkdm, oh);
}
return ret;
@@ -2698,6 +2697,7 @@ static int __init _register(struct omap_hwmod *oh)
INIT_LIST_HEAD(&oh->master_ports);
INIT_LIST_HEAD(&oh->slave_ports);
spin_lock_init(&oh->_lock);
+ lockdep_set_class(&oh->_lock, &oh->hwmod_key);
oh->_state = _HWMOD_STATE_REGISTERED;
diff --git a/arch/arm/mach-omap2/omap_hwmod.h b/arch/arm/mach-omap2/omap_hwmod.h
index 9d4bec6ee742..9611c91d9b82 100644
--- a/arch/arm/mach-omap2/omap_hwmod.h
+++ b/arch/arm/mach-omap2/omap_hwmod.h
@@ -674,6 +674,7 @@ struct omap_hwmod {
u32 _sysc_cache;
void __iomem *_mpu_rt_va;
spinlock_t _lock;
+ struct lock_class_key hwmod_key; /* unique lock class */
struct list_head node;
struct omap_hwmod_ocp_if *_mpu_port;
unsigned int (*xlate_irq)(unsigned int);
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index e8692e7675b8..16fe7a1b7a35 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -1466,55 +1466,18 @@ static struct omap_hwmod dra7xx_ocp2scp3_hwmod = {
*
*/
-static struct omap_hwmod_class dra7xx_pcie_hwmod_class = {
+static struct omap_hwmod_class dra7xx_pciess_hwmod_class = {
.name = "pcie",
};
/* pcie1 */
-static struct omap_hwmod dra7xx_pcie1_hwmod = {
+static struct omap_hwmod dra7xx_pciess1_hwmod = {
.name = "pcie1",
- .class = &dra7xx_pcie_hwmod_class,
+ .class = &dra7xx_pciess_hwmod_class,
.clkdm_name = "pcie_clkdm",
.main_clk = "l4_root_clk_div",
.prcm = {
.omap4 = {
- .clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
- .modulemode = MODULEMODE_SWCTRL,
- },
- },
-};
-
-/* pcie2 */
-static struct omap_hwmod dra7xx_pcie2_hwmod = {
- .name = "pcie2",
- .class = &dra7xx_pcie_hwmod_class,
- .clkdm_name = "pcie_clkdm",
- .main_clk = "l4_root_clk_div",
- .prcm = {
- .omap4 = {
- .clkctrl_offs = DRA7XX_CM_PCIE_CLKSTCTRL_OFFSET,
- .modulemode = MODULEMODE_SWCTRL,
- },
- },
-};
-
-/*
- * 'PCIE PHY' class
- *
- */
-
-static struct omap_hwmod_class dra7xx_pcie_phy_hwmod_class = {
- .name = "pcie-phy",
-};
-
-/* pcie1 phy */
-static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
- .name = "pcie1-phy",
- .class = &dra7xx_pcie_phy_hwmod_class,
- .clkdm_name = "l3init_clkdm",
- .main_clk = "l4_root_clk_div",
- .prcm = {
- .omap4 = {
.clkctrl_offs = DRA7XX_CM_L3INIT_PCIESS1_CLKCTRL_OFFSET,
.context_offs = DRA7XX_RM_L3INIT_PCIESS1_CONTEXT_OFFSET,
.modulemode = MODULEMODE_SWCTRL,
@@ -1522,11 +1485,11 @@ static struct omap_hwmod dra7xx_pcie1_phy_hwmod = {
},
};
-/* pcie2 phy */
-static struct omap_hwmod dra7xx_pcie2_phy_hwmod = {
- .name = "pcie2-phy",
- .class = &dra7xx_pcie_phy_hwmod_class,
- .clkdm_name = "l3init_clkdm",
+/* pcie2 */
+static struct omap_hwmod dra7xx_pciess2_hwmod = {
+ .name = "pcie2",
+ .class = &dra7xx_pciess_hwmod_class,
+ .clkdm_name = "pcie_clkdm",
.main_clk = "l4_root_clk_div",
.prcm = {
.omap4 = {
@@ -2877,50 +2840,34 @@ static struct omap_hwmod_ocp_if dra7xx_l4_cfg__ocp2scp3 = {
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
-/* l3_main_1 -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie1 = {
+/* l3_main_1 -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess1 = {
.master = &dra7xx_l3_main_1_hwmod,
- .slave = &dra7xx_pcie1_hwmod,
+ .slave = &dra7xx_pciess1_hwmod,
.clk = "l3_iclk_div",
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
-/* l4_cfg -> pcie1 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1 = {
+/* l4_cfg -> pciess1 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess1 = {
.master = &dra7xx_l4_cfg_hwmod,
- .slave = &dra7xx_pcie1_hwmod,
+ .slave = &dra7xx_pciess1_hwmod,
.clk = "l4_root_clk_div",
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
-/* l3_main_1 -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pcie2 = {
+/* l3_main_1 -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l3_main_1__pciess2 = {
.master = &dra7xx_l3_main_1_hwmod,
- .slave = &dra7xx_pcie2_hwmod,
+ .slave = &dra7xx_pciess2_hwmod,
.clk = "l3_iclk_div",
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
-/* l4_cfg -> pcie2 */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2 = {
- .master = &dra7xx_l4_cfg_hwmod,
- .slave = &dra7xx_pcie2_hwmod,
- .clk = "l4_root_clk_div",
- .user = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie1 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie1_phy = {
- .master = &dra7xx_l4_cfg_hwmod,
- .slave = &dra7xx_pcie1_phy_hwmod,
- .clk = "l4_root_clk_div",
- .user = OCP_USER_MPU | OCP_USER_SDMA,
-};
-
-/* l4_cfg -> pcie2 phy */
-static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pcie2_phy = {
+/* l4_cfg -> pciess2 */
+static struct omap_hwmod_ocp_if dra7xx_l4_cfg__pciess2 = {
.master = &dra7xx_l4_cfg_hwmod,
- .slave = &dra7xx_pcie2_phy_hwmod,
+ .slave = &dra7xx_pciess2_hwmod,
.clk = "l4_root_clk_div",
.user = OCP_USER_MPU | OCP_USER_SDMA,
};
@@ -3327,12 +3274,10 @@ static struct omap_hwmod_ocp_if *dra7xx_hwmod_ocp_ifs[] __initdata = {
&dra7xx_l4_cfg__mpu,
&dra7xx_l4_cfg__ocp2scp1,
&dra7xx_l4_cfg__ocp2scp3,
- &dra7xx_l3_main_1__pcie1,
- &dra7xx_l4_cfg__pcie1,
- &dra7xx_l3_main_1__pcie2,
- &dra7xx_l4_cfg__pcie2,
- &dra7xx_l4_cfg__pcie1_phy,
- &dra7xx_l4_cfg__pcie2_phy,
+ &dra7xx_l3_main_1__pciess1,
+ &dra7xx_l4_cfg__pciess1,
+ &dra7xx_l3_main_1__pciess2,
+ &dra7xx_l4_cfg__pciess2,
&dra7xx_l3_main_1__qspi,
&dra7xx_l4_per3__rtcss,
&dra7xx_l4_cfg__sata,
diff --git a/arch/arm/mach-omap2/pdata-quirks.c b/arch/arm/mach-omap2/pdata-quirks.c
index 190fa43e7479..e642b079e9f3 100644
--- a/arch/arm/mach-omap2/pdata-quirks.c
+++ b/arch/arm/mach-omap2/pdata-quirks.c
@@ -173,6 +173,7 @@ static void __init omap3_igep0030_rev_g_legacy_init(void)
static void __init omap3_evm_legacy_init(void)
{
+ hsmmc2_internal_input_clk();
legacy_init_wl12xx(WL12XX_REFCLOCK_38, 0, 149);
}
diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c
index a08a617a6c11..d6d6bc39e05c 100644
--- a/arch/arm/mach-omap2/prm44xx.c
+++ b/arch/arm/mach-omap2/prm44xx.c
@@ -252,10 +252,10 @@ static void omap44xx_prm_save_and_clear_irqen(u32 *saved_mask)
{
saved_mask[0] =
omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
- OMAP4_PRM_IRQSTATUS_MPU_OFFSET);
+ OMAP4_PRM_IRQENABLE_MPU_OFFSET);
saved_mask[1] =
omap4_prm_read_inst_reg(OMAP4430_PRM_OCP_SOCKET_INST,
- OMAP4_PRM_IRQSTATUS_MPU_2_OFFSET);
+ OMAP4_PRM_IRQENABLE_MPU_2_OFFSET);
omap4_prm_write_inst_reg(0, OMAP4430_PRM_OCP_SOCKET_INST,
OMAP4_PRM_IRQENABLE_MPU_OFFSET);
diff --git a/arch/arm/mach-pxa/idp.c b/arch/arm/mach-pxa/idp.c
index 343c4e3a7c5d..f6d02e4cbcda 100644
--- a/arch/arm/mach-pxa/idp.c
+++ b/arch/arm/mach-pxa/idp.c
@@ -36,6 +36,7 @@
#include <linux/platform_data/video-pxafb.h>
#include <mach/bitfield.h>
#include <linux/platform_data/mmc-pxamci.h>
+#include <linux/smc91x.h>
#include "generic.h"
#include "devices.h"
@@ -81,11 +82,16 @@ static struct resource smc91x_resources[] = {
}
};
+static struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_32BIT | SMC91X_USE_DMA | SMC91X_NOWAIT,
+};
+
static struct platform_device smc91x_device = {
.name = "smc91x",
.id = 0,
.num_resources = ARRAY_SIZE(smc91x_resources),
.resource = smc91x_resources,
+ .dev.platform_data = &smc91x_platdata,
};
static void idp_backlight_power(int on)
diff --git a/arch/arm/mach-pxa/irq.c b/arch/arm/mach-pxa/irq.c
index 0eecd83c624e..89a7c06570d3 100644
--- a/arch/arm/mach-pxa/irq.c
+++ b/arch/arm/mach-pxa/irq.c
@@ -11,6 +11,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#include <linux/bitops.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -40,7 +41,6 @@
#define ICHP_VAL_IRQ (1 << 31)
#define ICHP_IRQ(i) (((i) >> 16) & 0x7fff)
#define IPR_VALID (1 << 31)
-#define IRQ_BIT(n) (((n) - PXA_IRQ(0)) & 0x1f)
#define MAX_INTERNAL_IRQS 128
@@ -51,6 +51,7 @@
static void __iomem *pxa_irq_base;
static int pxa_internal_irq_nr;
static bool cpu_has_ipr;
+static struct irq_domain *pxa_irq_domain;
static inline void __iomem *irq_base(int i)
{
@@ -66,18 +67,20 @@ static inline void __iomem *irq_base(int i)
void pxa_mask_irq(struct irq_data *d)
{
void __iomem *base = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t irq = irqd_to_hwirq(d);
uint32_t icmr = __raw_readl(base + ICMR);
- icmr &= ~(1 << IRQ_BIT(d->irq));
+ icmr &= ~BIT(irq & 0x1f);
__raw_writel(icmr, base + ICMR);
}
void pxa_unmask_irq(struct irq_data *d)
{
void __iomem *base = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t irq = irqd_to_hwirq(d);
uint32_t icmr = __raw_readl(base + ICMR);
- icmr |= 1 << IRQ_BIT(d->irq);
+ icmr |= BIT(irq & 0x1f);
__raw_writel(icmr, base + ICMR);
}
@@ -118,40 +121,63 @@ asmlinkage void __exception_irq_entry ichp_handle_irq(struct pt_regs *regs)
} while (1);
}
-void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw)
{
- int irq, i, n;
+ void __iomem *base = irq_base(hw / 32);
- BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+ /* initialize interrupt priority */
+ if (cpu_has_ipr)
+ __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
+
+ irq_set_chip_and_handler(virq, &pxa_internal_irq_chip,
+ handle_level_irq);
+ irq_set_chip_data(virq, base);
+ set_irq_flags(virq, IRQF_VALID);
+
+ return 0;
+}
+
+static struct irq_domain_ops pxa_irq_ops = {
+ .map = pxa_irq_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static __init void
+pxa_init_irq_common(struct device_node *node, int irq_nr,
+ int (*fn)(struct irq_data *, unsigned int))
+{
+ int n;
pxa_internal_irq_nr = irq_nr;
- cpu_has_ipr = !cpu_is_pxa25x();
- pxa_irq_base = io_p2v(0x40d00000);
+ pxa_irq_domain = irq_domain_add_legacy(node, irq_nr,
+ PXA_IRQ(0), 0,
+ &pxa_irq_ops, NULL);
+ if (!pxa_irq_domain)
+ panic("Unable to add PXA IRQ domain\n");
+ irq_set_default_host(pxa_irq_domain);
for (n = 0; n < irq_nr; n += 32) {
void __iomem *base = irq_base(n >> 5);
__raw_writel(0, base + ICMR); /* disable all IRQs */
__raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */
- for (i = n; (i < (n + 32)) && (i < irq_nr); i++) {
- /* initialize interrupt priority */
- if (cpu_has_ipr)
- __raw_writel(i | IPR_VALID, pxa_irq_base + IPR(i));
-
- irq = PXA_IRQ(i);
- irq_set_chip_and_handler(irq, &pxa_internal_irq_chip,
- handle_level_irq);
- irq_set_chip_data(irq, base);
- set_irq_flags(irq, IRQF_VALID);
- }
}
-
/* only unmasked interrupts kick us out of idle */
__raw_writel(1, irq_base(0) + ICCR);
pxa_internal_irq_chip.irq_set_wake = fn;
}
+void __init pxa_init_irq(int irq_nr, int (*fn)(struct irq_data *, unsigned int))
+{
+ BUG_ON(irq_nr > MAX_INTERNAL_IRQS);
+
+ pxa_irq_base = io_p2v(0x40d00000);
+ cpu_has_ipr = !cpu_is_pxa25x();
+ pxa_init_irq_common(NULL, irq_nr, fn);
+}
+
#ifdef CONFIG_PM
static unsigned long saved_icmr[MAX_INTERNAL_IRQS/32];
static unsigned long saved_ipr[MAX_INTERNAL_IRQS];
@@ -203,30 +229,6 @@ struct syscore_ops pxa_irq_syscore_ops = {
};
#ifdef CONFIG_OF
-static struct irq_domain *pxa_irq_domain;
-
-static int pxa_irq_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t hw)
-{
- void __iomem *base = irq_base(hw / 32);
-
- /* initialize interrupt priority */
- if (cpu_has_ipr)
- __raw_writel(hw | IPR_VALID, pxa_irq_base + IPR(hw));
-
- irq_set_chip_and_handler(hw, &pxa_internal_irq_chip,
- handle_level_irq);
- irq_set_chip_data(hw, base);
- set_irq_flags(hw, IRQF_VALID);
-
- return 0;
-}
-
-static struct irq_domain_ops pxa_irq_ops = {
- .map = pxa_irq_map,
- .xlate = irq_domain_xlate_onecell,
-};
-
static const struct of_device_id intc_ids[] __initconst = {
{ .compatible = "marvell,pxa-intc", },
{}
@@ -236,7 +238,7 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
{
struct device_node *node;
struct resource res;
- int n, ret;
+ int ret;
node = of_find_matching_node(NULL, intc_ids);
if (!node) {
@@ -267,23 +269,6 @@ void __init pxa_dt_irq_init(int (*fn)(struct irq_data *, unsigned int))
return;
}
- pxa_irq_domain = irq_domain_add_legacy(node, pxa_internal_irq_nr, 0, 0,
- &pxa_irq_ops, NULL);
- if (!pxa_irq_domain)
- panic("Unable to add PXA IRQ domain\n");
-
- irq_set_default_host(pxa_irq_domain);
-
- for (n = 0; n < pxa_internal_irq_nr; n += 32) {
- void __iomem *base = irq_base(n >> 5);
-
- __raw_writel(0, base + ICMR); /* disable all IRQs */
- __raw_writel(0, base + ICLR); /* all IRQs are IRQ, not FIQ */
- }
-
- /* only unmasked interrupts kick us out of idle */
- __raw_writel(1, irq_base(0) + ICCR);
-
- pxa_internal_irq_chip.irq_set_wake = fn;
+ pxa_init_irq_common(node, pxa_internal_irq_nr, fn);
}
#endif /* CONFIG_OF */
diff --git a/arch/arm/mach-pxa/lpd270.c b/arch/arm/mach-pxa/lpd270.c
index ad777b353bd5..eaee2c20b189 100644
--- a/arch/arm/mach-pxa/lpd270.c
+++ b/arch/arm/mach-pxa/lpd270.c
@@ -24,6 +24,7 @@
#include <linux/mtd/mtd.h>
#include <linux/mtd/partitions.h>
#include <linux/pwm_backlight.h>
+#include <linux/smc91x.h>
#include <asm/types.h>
#include <asm/setup.h>
@@ -189,15 +190,20 @@ static struct resource smc91x_resources[] = {
[1] = {
.start = LPD270_ETHERNET_IRQ,
.end = LPD270_ETHERNET_IRQ,
- .flags = IORESOURCE_IRQ,
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE,
},
};
+struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
+
static struct platform_device smc91x_device = {
.name = "smc91x",
.id = 0,
.num_resources = ARRAY_SIZE(smc91x_resources),
.resource = smc91x_resources,
+ .dev.platform_data = &smc91x_platdata,
};
static struct resource lpd270_flash_resources[] = {
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index 205f9bf3821e..ac2ae5c71ab4 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -412,7 +412,7 @@ static struct fixed_voltage_config can_regulator_pdata = {
};
static struct platform_device can_regulator_device = {
- .name = "reg-fixed-volage",
+ .name = "reg-fixed-voltage",
.id = 0,
.dev = {
.platform_data = &can_regulator_pdata,
diff --git a/arch/arm/mach-realview/core.c b/arch/arm/mach-realview/core.c
index 850e506926df..c309593abdb2 100644
--- a/arch/arm/mach-realview/core.c
+++ b/arch/arm/mach-realview/core.c
@@ -28,6 +28,7 @@
#include <linux/platform_data/video-clcd-versatile.h>
#include <linux/io.h>
#include <linux/smsc911x.h>
+#include <linux/smc91x.h>
#include <linux/ata_platform.h>
#include <linux/amba/mmci.h>
#include <linux/gfp.h>
@@ -94,6 +95,10 @@ static struct smsc911x_platform_config smsc911x_config = {
.phy_interface = PHY_INTERFACE_MODE_MII,
};
+static struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_32BIT | SMC91X_NOWAIT,
+};
+
static struct platform_device realview_eth_device = {
.name = "smsc911x",
.id = 0,
@@ -107,6 +112,8 @@ int realview_eth_register(const char *name, struct resource *res)
realview_eth_device.resource = res;
if (strcmp(realview_eth_device.name, "smsc911x") == 0)
realview_eth_device.dev.platform_data = &smsc911x_config;
+ else
+ realview_eth_device.dev.platform_data = &smc91x_platdata;
return platform_device_register(&realview_eth_device);
}
diff --git a/arch/arm/mach-realview/realview_eb.c b/arch/arm/mach-realview/realview_eb.c
index 64c88d657f9e..b3869cbbcc68 100644
--- a/arch/arm/mach-realview/realview_eb.c
+++ b/arch/arm/mach-realview/realview_eb.c
@@ -234,7 +234,7 @@ static struct resource realview_eb_eth_resources[] = {
[1] = {
.start = IRQ_EB_ETH,
.end = IRQ_EB_ETH,
- .flags = IORESOURCE_IRQ,
+ .flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHEDGE,
},
};
diff --git a/arch/arm/mach-sa1100/neponset.c b/arch/arm/mach-sa1100/neponset.c
index 169262e3040d..af868d258e66 100644
--- a/arch/arm/mach-sa1100/neponset.c
+++ b/arch/arm/mach-sa1100/neponset.c
@@ -12,6 +12,7 @@
#include <linux/pm.h>
#include <linux/serial_core.h>
#include <linux/slab.h>
+#include <linux/smc91x.h>
#include <asm/mach-types.h>
#include <asm/mach/map.h>
@@ -258,12 +259,17 @@ static int neponset_probe(struct platform_device *dev)
0x02000000, "smc91x-attrib"),
{ .flags = IORESOURCE_IRQ },
};
+ struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_8BIT | SMC91X_IO_SHIFT_2 | SMC91X_NOWAIT,
+ };
struct platform_device_info smc91x_devinfo = {
.parent = &dev->dev,
.name = "smc91x",
.id = 0,
.res = smc91x_resources,
.num_res = ARRAY_SIZE(smc91x_resources),
+ .data = &smc91x_platdata,
+ .size_data = sizeof(smc91x_platdata),
};
int ret, irq;
diff --git a/arch/arm/mach-sa1100/pleb.c b/arch/arm/mach-sa1100/pleb.c
index 091261878eff..1525d7b5f1b7 100644
--- a/arch/arm/mach-sa1100/pleb.c
+++ b/arch/arm/mach-sa1100/pleb.c
@@ -11,6 +11,7 @@
#include <linux/irq.h>
#include <linux/io.h>
#include <linux/mtd/partitions.h>
+#include <linux/smc91x.h>
#include <mach/hardware.h>
#include <asm/setup.h>
@@ -43,12 +44,18 @@ static struct resource smc91x_resources[] = {
#endif
};
+static struct smc91x_platdata smc91x_platdata = {
+ .flags = SMC91X_USE_16BIT | SMC91X_NOWAIT,
+};
static struct platform_device smc91x_device = {
.name = "smc91x",
.id = 0,
.num_resources = ARRAY_SIZE(smc91x_resources),
.resource = smc91x_resources,
+ .dev = {
+ .platform_data = &smc91x_platdata,
+ },
};
static struct platform_device *devices[] __initdata = {
diff --git a/arch/arm/mach-socfpga/core.h b/arch/arm/mach-socfpga/core.h
index 483cb467bf65..a0f3b1cd497c 100644
--- a/arch/arm/mach-socfpga/core.h
+++ b/arch/arm/mach-socfpga/core.h
@@ -45,6 +45,6 @@ extern char secondary_trampoline, secondary_trampoline_end;
extern unsigned long socfpga_cpu1start_addr;
-#define SOCFPGA_SCU_VIRT_BASE 0xfffec000
+#define SOCFPGA_SCU_VIRT_BASE 0xfee00000
#endif
diff --git a/arch/arm/mach-socfpga/socfpga.c b/arch/arm/mach-socfpga/socfpga.c
index 383d61e138af..f5e597c207b9 100644
--- a/arch/arm/mach-socfpga/socfpga.c
+++ b/arch/arm/mach-socfpga/socfpga.c
@@ -23,6 +23,7 @@
#include <asm/hardware/cache-l2x0.h>
#include <asm/mach/arch.h>
#include <asm/mach/map.h>
+#include <asm/cacheflush.h>
#include "core.h"
@@ -73,6 +74,10 @@ void __init socfpga_sysmgr_init(void)
(u32 *) &socfpga_cpu1start_addr))
pr_err("SMP: Need cpu1-start-addr in device tree.\n");
+ /* Ensure that socfpga_cpu1start_addr is visible to other CPUs */
+ smp_wmb();
+ sync_cache_w(&socfpga_cpu1start_addr);
+
sys_manager_base_addr = of_iomap(np, 0);
np = of_find_compatible_node(NULL, NULL, "altr,rst-mgr");
diff --git a/arch/arm/mach-sti/board-dt.c b/arch/arm/mach-sti/board-dt.c
index b067390cef4e..b373acade338 100644
--- a/arch/arm/mach-sti/board-dt.c
+++ b/arch/arm/mach-sti/board-dt.c
@@ -18,6 +18,7 @@ static const char *stih41x_dt_match[] __initdata = {
"st,stih415",
"st,stih416",
"st,stih407",
+ "st,stih410",
"st,stih418",
NULL
};
diff --git a/arch/arm/mach-sunxi/Kconfig b/arch/arm/mach-sunxi/Kconfig
index a77604fbaf25..81502b90dd91 100644
--- a/arch/arm/mach-sunxi/Kconfig
+++ b/arch/arm/mach-sunxi/Kconfig
@@ -1,10 +1,12 @@
menuconfig ARCH_SUNXI
bool "Allwinner SoCs" if ARCH_MULTI_V7
select ARCH_REQUIRE_GPIOLIB
+ select ARCH_HAS_RESET_CONTROLLER
select CLKSRC_MMIO
select GENERIC_IRQ_CHIP
select PINCTRL
select SUN4I_TIMER
+ select RESET_CONTROLLER
if ARCH_SUNXI
@@ -20,10 +22,8 @@ config MACH_SUN5I
config MACH_SUN6I
bool "Allwinner A31 (sun6i) SoCs support"
default ARCH_SUNXI
- select ARCH_HAS_RESET_CONTROLLER
select ARM_GIC
select MFD_SUN6I_PRCM
- select RESET_CONTROLLER
select SUN5I_HSTIMER
config MACH_SUN7I
@@ -37,16 +37,12 @@ config MACH_SUN7I
config MACH_SUN8I
bool "Allwinner A23 (sun8i) SoCs support"
default ARCH_SUNXI
- select ARCH_HAS_RESET_CONTROLLER
select ARM_GIC
select MFD_SUN6I_PRCM
- select RESET_CONTROLLER
config MACH_SUN9I
bool "Allwinner (sun9i) SoCs support"
default ARCH_SUNXI
- select ARCH_HAS_RESET_CONTROLLER
select ARM_GIC
- select RESET_CONTROLLER
endif
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index f2b586d7b15d..155807fa6fdd 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -15,7 +15,7 @@
*/
#include <asm/firmware.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
@@ -44,7 +44,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
tegra_set_cpu_in_lp2();
cpu_pm_enter();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+ tick_broadcast_enter();
call_firmware_op(prepare_idle);
@@ -52,7 +52,7 @@ static int tegra114_idle_power_down(struct cpuidle_device *dev,
if (call_firmware_op(do_idle, 0) == -ENOSYS)
cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ tick_broadcast_exit();
cpu_pm_exit();
tegra_clear_cpu_in_lp2();
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index 4f25a7c7ca0f..48844ae6c3a1 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -20,7 +20,7 @@
*/
#include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
@@ -136,11 +136,11 @@ static bool tegra20_cpu_cluster_power_down(struct cpuidle_device *dev,
if (tegra20_reset_cpu_1() || !tegra_cpu_rail_off_ready())
return false;
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+ tick_broadcast_enter();
tegra_idle_lp2_last();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ tick_broadcast_exit();
if (cpu_online(1))
tegra20_wake_cpu1_from_reset();
@@ -153,13 +153,13 @@ static bool tegra20_idle_enter_lp2_cpu_1(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+ tick_broadcast_enter();
cpu_suspend(0, tegra20_sleep_cpu_secondary_finish);
tegra20_cpu_clear_resettable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ tick_broadcast_exit();
return true;
}
diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index f8815ed65d9d..84d809a3cba3 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -20,7 +20,7 @@
*/
#include <linux/clk/tegra.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <linux/cpuidle.h>
#include <linux/cpu_pm.h>
#include <linux/kernel.h>
@@ -76,11 +76,11 @@ static bool tegra30_cpu_cluster_power_down(struct cpuidle_device *dev,
return false;
}
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+ tick_broadcast_enter();
tegra_idle_lp2_last();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ tick_broadcast_exit();
return true;
}
@@ -90,13 +90,13 @@ static bool tegra30_cpu_core_power_down(struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index)
{
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &dev->cpu);
+ tick_broadcast_enter();
smp_wmb();
cpu_suspend(0, tegra30_sleep_cpu_secondary_finish);
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &dev->cpu);
+ tick_broadcast_exit();
return true;
}
diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c
index c6c7696b8db9..8f15f70622a6 100644
--- a/arch/arm/mm/cache-l2x0.c
+++ b/arch/arm/mm/cache-l2x0.c
@@ -1131,23 +1131,22 @@ static void __init l2c310_of_parse(const struct device_node *np,
}
ret = l2x0_cache_size_of_parse(np, aux_val, aux_mask, &assoc, SZ_512K);
- if (ret)
- return;
-
- switch (assoc) {
- case 16:
- *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
- *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
- *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
- break;
- case 8:
- *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
- *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
- break;
- default:
- pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
- assoc);
- break;
+ if (!ret) {
+ switch (assoc) {
+ case 16:
+ *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ *aux_val |= L310_AUX_CTRL_ASSOCIATIVITY_16;
+ *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ break;
+ case 8:
+ *aux_val &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ *aux_mask &= ~L2X0_AUX_CTRL_ASSOC_MASK;
+ break;
+ default:
+ pr_err("L2C-310 OF cache associativity %d invalid, only 8 or 16 permitted\n",
+ assoc);
+ break;
+ }
}
prefetch = l2x0_saved_regs.prefetch_ctrl;
diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c
index 170a116d1b29..c27447653903 100644
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@@ -171,7 +171,7 @@ static int __dma_supported(struct device *dev, u64 mask, bool warn)
*/
if (sizeof(mask) != sizeof(dma_addr_t) &&
mask > (dma_addr_t)~0 &&
- dma_to_pfn(dev, ~0) < max_pfn) {
+ dma_to_pfn(dev, ~0) < max_pfn - 1) {
if (warn) {
dev_warn(dev, "Coherent DMA mask %#llx is larger than dma_addr_t allows\n",
mask);
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index a982dc3190df..6333d9c17875 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -552,6 +552,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
pr_alert("Unhandled fault: %s (0x%03x) at 0x%08lx\n",
inf->name, fsr, addr);
+ show_pte(current->mm, addr);
info.si_signo = inf->sig;
info.si_errno = 0;
diff --git a/arch/arm/mm/pageattr.c b/arch/arm/mm/pageattr.c
index 004e35cdcfff..cf30daff8932 100644
--- a/arch/arm/mm/pageattr.c
+++ b/arch/arm/mm/pageattr.c
@@ -49,7 +49,10 @@ static int change_memory_common(unsigned long addr, int numpages,
WARN_ON_ONCE(1);
}
- if (!is_module_address(start) || !is_module_address(end - 1))
+ if (start < MODULES_VADDR || start >= MODULES_END)
+ return -EINVAL;
+
+ if (end < MODULES_VADDR || start >= MODULES_END)
return -EINVAL;
data.set_mask = set_mask;
diff --git a/arch/arm/plat-omap/counter_32k.c b/arch/arm/plat-omap/counter_32k.c
index 61b4d705c267..2438b96004c1 100644
--- a/arch/arm/plat-omap/counter_32k.c
+++ b/arch/arm/plat-omap/counter_32k.c
@@ -44,24 +44,20 @@ static u64 notrace omap_32k_read_sched_clock(void)
}
/**
- * omap_read_persistent_clock - Return time from a persistent clock.
+ * omap_read_persistent_clock64 - Return time from a persistent clock.
*
* Reads the time from a source which isn't disabled during PM, the
* 32k sync timer. Convert the cycles elapsed since last read into
- * nsecs and adds to a monotonically increasing timespec.
+ * nsecs and adds to a monotonically increasing timespec64.
*/
-static struct timespec persistent_ts;
+static struct timespec64 persistent_ts;
static cycles_t cycles;
static unsigned int persistent_mult, persistent_shift;
-static DEFINE_SPINLOCK(read_persistent_clock_lock);
-static void omap_read_persistent_clock(struct timespec *ts)
+static void omap_read_persistent_clock64(struct timespec64 *ts)
{
unsigned long long nsecs;
cycles_t last_cycles;
- unsigned long flags;
-
- spin_lock_irqsave(&read_persistent_clock_lock, flags);
last_cycles = cycles;
cycles = sync32k_cnt_reg ? readl_relaxed(sync32k_cnt_reg) : 0;
@@ -69,11 +65,9 @@ static void omap_read_persistent_clock(struct timespec *ts)
nsecs = clocksource_cyc2ns(cycles - last_cycles,
persistent_mult, persistent_shift);
- timespec_add_ns(&persistent_ts, nsecs);
+ timespec64_add_ns(&persistent_ts, nsecs);
*ts = persistent_ts;
-
- spin_unlock_irqrestore(&read_persistent_clock_lock, flags);
}
/**
@@ -103,7 +97,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
/*
* 120000 rough estimate from the calculations in
- * __clocksource_updatefreq_scale.
+ * __clocksource_update_freq_scale.
*/
clocks_calc_mult_shift(&persistent_mult, &persistent_shift,
32768, NSEC_PER_SEC, 120000);
@@ -116,7 +110,7 @@ int __init omap_init_clocksource_32k(void __iomem *vbase)
}
sched_clock_register(omap_32k_read_sched_clock, 32, 32768);
- register_persistent_clock(NULL, omap_read_persistent_clock);
+ register_persistent_clock(NULL, omap_read_persistent_clock64);
pr_info("OMAP clocksource: 32k_counter at 32768 Hz\n");
return 0;
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index db10169a08de..8ca94d379bc3 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -799,6 +799,7 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
struct device *dev = &pdev->dev;
const struct of_device_id *match;
const struct dmtimer_platform_data *pdata;
+ int ret;
match = of_match_device(of_match_ptr(omap_timer_match), dev);
pdata = match ? match->data : dev->platform_data;
@@ -860,7 +861,12 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
}
if (!timer->reserved) {
- pm_runtime_get_sync(dev);
+ ret = pm_runtime_get_sync(dev);
+ if (ret < 0) {
+ dev_err(dev, "%s: pm_runtime_get_sync failed!\n",
+ __func__);
+ goto err_get_sync;
+ }
__omap_dm_timer_init_regs(timer);
pm_runtime_put(dev);
}
@@ -873,6 +879,11 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
dev_dbg(dev, "Device Probed.\n");
return 0;
+
+err_get_sync:
+ pm_runtime_put_noidle(dev);
+ pm_runtime_disable(dev);
+ return ret;
}
/**
@@ -899,6 +910,8 @@ static int omap_dm_timer_remove(struct platform_device *pdev)
}
spin_unlock_irqrestore(&dm_timer_lock, flags);
+ pm_runtime_disable(&pdev->dev);
+
return ret;
}
diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi
index f1ad9c2ab2e9..a857794432d6 100644
--- a/arch/arm64/boot/dts/apm/apm-storm.dtsi
+++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi
@@ -622,7 +622,7 @@
};
sgenet0: ethernet@1f210000 {
- compatible = "apm,xgene-enet";
+ compatible = "apm,xgene1-sgenet";
status = "disabled";
reg = <0x0 0x1f210000 0x0 0xd100>,
<0x0 0x1f200000 0x0 0Xc300>,
@@ -636,7 +636,7 @@
};
xgenet: ethernet@1f610000 {
- compatible = "apm,xgene-enet";
+ compatible = "apm,xgene1-xgenet";
status = "disabled";
reg = <0x0 0x1f610000 0x0 0xd100>,
<0x0 0x1f600000 0x0 0Xc300>,
diff --git a/arch/arm64/boot/dts/arm/juno-clocks.dtsi b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
index ea2b5666a16f..c9b89efe0f56 100644
--- a/arch/arm64/boot/dts/arm/juno-clocks.dtsi
+++ b/arch/arm64/boot/dts/arm/juno-clocks.dtsi
@@ -8,7 +8,7 @@
*/
/* SoC fixed clocks */
- soc_uartclk: refclk72738khz {
+ soc_uartclk: refclk7273800hz {
compatible = "fixed-clock";
#clock-cells = <0>;
clock-frequency = <7273800>;
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index cb9593079f29..d8c25b7b18fb 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -246,14 +246,30 @@ static inline unsigned long __cmpxchg_mb(volatile void *ptr, unsigned long old,
__ret; \
})
-#define this_cpu_cmpxchg_1(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_2(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_4(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-#define this_cpu_cmpxchg_8(ptr, o, n) cmpxchg_local(raw_cpu_ptr(&(ptr)), o, n)
-
-#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
- cmpxchg_double_local(raw_cpu_ptr(&(ptr1)), raw_cpu_ptr(&(ptr2)), \
- o1, o2, n1, n2)
+#define _protect_cmpxchg_local(pcp, o, n) \
+({ \
+ typeof(*raw_cpu_ptr(&(pcp))) __ret; \
+ preempt_disable(); \
+ __ret = cmpxchg_local(raw_cpu_ptr(&(pcp)), o, n); \
+ preempt_enable(); \
+ __ret; \
+})
+
+#define this_cpu_cmpxchg_1(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_2(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_4(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+#define this_cpu_cmpxchg_8(ptr, o, n) _protect_cmpxchg_local(ptr, o, n)
+
+#define this_cpu_cmpxchg_double_8(ptr1, ptr2, o1, o2, n1, n2) \
+({ \
+ int __ret; \
+ preempt_disable(); \
+ __ret = cmpxchg_double_local( raw_cpu_ptr(&(ptr1)), \
+ raw_cpu_ptr(&(ptr2)), \
+ o1, o2, n1, n2); \
+ preempt_enable(); \
+ __ret; \
+})
#define cmpxchg64(ptr,o,n) cmpxchg((ptr),(o),(n))
#define cmpxchg64_local(ptr,o,n) cmpxchg_local((ptr),(o),(n))
diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h
index 92bbae381598..70522450ca23 100644
--- a/arch/arm64/include/asm/esr.h
+++ b/arch/arm64/include/asm/esr.h
@@ -90,6 +90,7 @@
#define ESR_ELx_FSC (0x3F)
#define ESR_ELx_FSC_TYPE (0x3C)
#define ESR_ELx_FSC_EXTABT (0x10)
+#define ESR_ELx_FSC_ACCESS (0x08)
#define ESR_ELx_FSC_FAULT (0x04)
#define ESR_ELx_FSC_PERM (0x0C)
#define ESR_ELx_CV (UL(1) << 24)
diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h
index 076a1c714049..c0e5165c2f76 100644
--- a/arch/arm64/include/asm/jump_label.h
+++ b/arch/arm64/include/asm/jump_label.h
@@ -18,11 +18,12 @@
*/
#ifndef __ASM_JUMP_LABEL_H
#define __ASM_JUMP_LABEL_H
+
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#include <asm/insn.h>
-#ifdef __KERNEL__
-
#define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE
static __always_inline bool arch_static_branch(struct static_key *key)
@@ -39,8 +40,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
typedef u64 jump_label_t;
struct jump_entry {
@@ -49,4 +48,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif /* __ASM_JUMP_LABEL_H */
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 94674eb7e7bb..ac6fafb95fe7 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -129,6 +129,9 @@
* 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are
* not known to exist and will break with this configuration.
*
+ * VTCR_EL2.PS is extracted from ID_AA64MMFR0_EL1.PARange at boot time
+ * (see hyp-init.S).
+ *
* Note that when using 4K pages, we concatenate two first level page tables
* together.
*
@@ -138,7 +141,6 @@
#ifdef CONFIG_ARM64_64K_PAGES
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 64kB pages (TG0 = 1)
* 2 level page tables (SL = 1)
@@ -150,7 +152,6 @@
#else
/*
* Stage2 translation configuration:
- * 40bits output (PS = 2)
* 40bits input (T0SZ = 24)
* 4kB pages (TG0 = 0)
* 3 level page tables (SL = 1)
@@ -187,6 +188,7 @@
/* For compatibility with fault code shared with 32-bit */
#define FSC_FAULT ESR_ELx_FSC_FAULT
+#define FSC_ACCESS ESR_ELx_FSC_ACCESS
#define FSC_PERM ESR_ELx_FSC_PERM
/* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 8ac3c70fe3c6..f0f58c9beec0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -28,6 +28,8 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
+#define __KVM_HAVE_ARCH_INTC_INITIALIZED
+
#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
#else
@@ -177,19 +179,10 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
int kvm_unmap_hva_range(struct kvm *kvm,
unsigned long start, unsigned long end);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
+int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
/* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
- unsigned long end)
-{
- return 0;
-}
-
-static inline int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-{
- return 0;
-}
-
static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
unsigned long address)
{
diff --git a/arch/arm64/include/asm/kvm_mmio.h b/arch/arm64/include/asm/kvm_mmio.h
index 9f52beb7cb13..889c908ee631 100644
--- a/arch/arm64/include/asm/kvm_mmio.h
+++ b/arch/arm64/include/asm/kvm_mmio.h
@@ -31,28 +31,6 @@ struct kvm_decode {
bool sign_extend;
};
-/*
- * The in-kernel MMIO emulation code wants to use a copy of run->mmio,
- * which is an anonymous type. Use our own type instead.
- */
-struct kvm_exit_mmio {
- phys_addr_t phys_addr;
- u8 data[8];
- u32 len;
- bool is_write;
- void *private;
-};
-
-static inline void kvm_prepare_mmio(struct kvm_run *run,
- struct kvm_exit_mmio *mmio)
-{
- run->mmio.phys_addr = mmio->phys_addr;
- run->mmio.len = mmio->len;
- run->mmio.is_write = mmio->is_write;
- memcpy(run->mmio.data, mmio->data, mmio->len);
- run->exit_reason = KVM_EXIT_MMIO;
-}
-
int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
phys_addr_t fault_ipa);
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 6458b5373142..bbfb600fa822 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -158,6 +158,8 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define PTRS_PER_S2_PGD (1 << PTRS_PER_S2_PGD_SHIFT)
#define S2_PGD_ORDER get_order(PTRS_PER_S2_PGD * sizeof(pgd_t))
+#define kvm_pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_S2_PGD - 1))
+
/*
* If we are concatenating first level stage-2 page tables, we would have less
* than or equal to 16 pointers in the fake PGD, because that's what the
@@ -171,43 +173,6 @@ static inline bool kvm_s2pmd_readonly(pmd_t *pmd)
#define KVM_PREALLOC_LEVEL (0)
#endif
-/**
- * kvm_prealloc_hwpgd - allocate inital table for VTTBR
- * @kvm: The KVM struct pointer for the VM.
- * @pgd: The kernel pseudo pgd
- *
- * When the kernel uses more levels of page tables than the guest, we allocate
- * a fake PGD and pre-populate it to point to the next-level page table, which
- * will be the real initial page table pointed to by the VTTBR.
- *
- * When KVM_PREALLOC_LEVEL==2, we allocate a single page for the PMD and
- * the kernel will use folded pud. When KVM_PREALLOC_LEVEL==1, we
- * allocate 2 consecutive PUD pages.
- */
-static inline int kvm_prealloc_hwpgd(struct kvm *kvm, pgd_t *pgd)
-{
- unsigned int i;
- unsigned long hwpgd;
-
- if (KVM_PREALLOC_LEVEL == 0)
- return 0;
-
- hwpgd = __get_free_pages(GFP_KERNEL | __GFP_ZERO, PTRS_PER_S2_PGD_SHIFT);
- if (!hwpgd)
- return -ENOMEM;
-
- for (i = 0; i < PTRS_PER_S2_PGD; i++) {
- if (KVM_PREALLOC_LEVEL == 1)
- pgd_populate(NULL, pgd + i,
- (pud_t *)hwpgd + i * PTRS_PER_PUD);
- else if (KVM_PREALLOC_LEVEL == 2)
- pud_populate(NULL, pud_offset(pgd, 0) + i,
- (pmd_t *)hwpgd + i * PTRS_PER_PMD);
- }
-
- return 0;
-}
-
static inline void *kvm_get_hwpgd(struct kvm *kvm)
{
pgd_t *pgd = kvm->arch.pgd;
@@ -224,12 +189,11 @@ static inline void *kvm_get_hwpgd(struct kvm *kvm)
return pmd_offset(pud, 0);
}
-static inline void kvm_free_hwpgd(struct kvm *kvm)
+static inline unsigned int kvm_get_hwpgd_size(void)
{
- if (KVM_PREALLOC_LEVEL > 0) {
- unsigned long hwpgd = (unsigned long)kvm_get_hwpgd(kvm);
- free_pages(hwpgd, PTRS_PER_S2_PGD_SHIFT);
- }
+ if (KVM_PREALLOC_LEVEL > 0)
+ return PTRS_PER_S2_PGD * PAGE_SIZE;
+ return PTRS_PER_S2_PGD * sizeof(pgd_t);
}
static inline bool kvm_page_empty(void *ptr)
diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h
index a9eee33dfa62..101a42bde728 100644
--- a/arch/arm64/include/asm/mmu_context.h
+++ b/arch/arm64/include/asm/mmu_context.h
@@ -151,6 +151,15 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
unsigned int cpu = smp_processor_id();
+ /*
+ * init_mm.pgd does not contain any user mappings and it is always
+ * active for kernel addresses in TTBR1. Just set the reserved TTBR0.
+ */
+ if (next == &init_mm) {
+ cpu_set_reserved_ttbr0();
+ return;
+ }
+
if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next)) || prev != next)
check_and_switch_context(next, tsk);
}
diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h
index 09da25bc596f..4fde8c1df97f 100644
--- a/arch/arm64/include/asm/percpu.h
+++ b/arch/arm64/include/asm/percpu.h
@@ -204,25 +204,47 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val,
return ret;
}
+#define _percpu_read(pcp) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable(); \
+ __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \
+ sizeof(pcp)); \
+ preempt_enable(); \
+ __retval; \
+})
+
+#define _percpu_write(pcp, val) \
+do { \
+ preempt_disable(); \
+ __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \
+ sizeof(pcp)); \
+ preempt_enable(); \
+} while(0) \
+
+#define _pcp_protect(operation, pcp, val) \
+({ \
+ typeof(pcp) __retval; \
+ preempt_disable(); \
+ __retval = (typeof(pcp))operation(raw_cpu_ptr(&(pcp)), \
+ (val), sizeof(pcp)); \
+ preempt_enable(); \
+ __retval; \
+})
+
#define _percpu_add(pcp, val) \
- __percpu_add(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+ _pcp_protect(__percpu_add, pcp, val)
-#define _percpu_add_return(pcp, val) (typeof(pcp)) (_percpu_add(pcp, val))
+#define _percpu_add_return(pcp, val) _percpu_add(pcp, val)
#define _percpu_and(pcp, val) \
- __percpu_and(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
+ _pcp_protect(__percpu_and, pcp, val)
#define _percpu_or(pcp, val) \
- __percpu_or(raw_cpu_ptr(&(pcp)), val, sizeof(pcp))
-
-#define _percpu_read(pcp) (typeof(pcp)) \
- (__percpu_read(raw_cpu_ptr(&(pcp)), sizeof(pcp)))
-
-#define _percpu_write(pcp, val) \
- __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp))
+ _pcp_protect(__percpu_or, pcp, val)
#define _percpu_xchg(pcp, val) (typeof(pcp)) \
- (__percpu_xchg(raw_cpu_ptr(&(pcp)), (unsigned long)(val), sizeof(pcp)))
+ _pcp_protect(__percpu_xchg, pcp, (unsigned long)(val))
#define this_cpu_add_1(pcp, val) _percpu_add(pcp, val)
#define this_cpu_add_2(pcp, val) _percpu_add(pcp, val)
diff --git a/arch/arm64/include/asm/proc-fns.h b/arch/arm64/include/asm/proc-fns.h
index 9a8fd84f8fb2..941c375616e2 100644
--- a/arch/arm64/include/asm/proc-fns.h
+++ b/arch/arm64/include/asm/proc-fns.h
@@ -39,7 +39,11 @@ extern u64 cpu_do_resume(phys_addr_t ptr, u64 idmap_ttbr);
#include <asm/memory.h>
-#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)
+#define cpu_switch_mm(pgd,mm) \
+do { \
+ BUG_ON(pgd == swapper_pg_dir); \
+ cpu_do_switch_mm(virt_to_phys(pgd),mm); \
+} while (0)
#define cpu_get_pgd() \
({ \
diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h
index c028fe37456f..53d9c354219f 100644
--- a/arch/arm64/include/asm/tlb.h
+++ b/arch/arm64/include/asm/tlb.h
@@ -48,6 +48,7 @@ static inline void tlb_flush(struct mmu_gather *tlb)
static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
unsigned long addr)
{
+ __flush_tlb_pgtable(tlb->mm, addr);
pgtable_page_dtor(pte);
tlb_remove_entry(tlb, pte);
}
@@ -56,6 +57,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte,
static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
unsigned long addr)
{
+ __flush_tlb_pgtable(tlb->mm, addr);
tlb_remove_entry(tlb, virt_to_page(pmdp));
}
#endif
@@ -64,6 +66,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp,
static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp,
unsigned long addr)
{
+ __flush_tlb_pgtable(tlb->mm, addr);
tlb_remove_entry(tlb, virt_to_page(pudp));
}
#endif
diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h
index 4abe9b945f77..c3bb05b98616 100644
--- a/arch/arm64/include/asm/tlbflush.h
+++ b/arch/arm64/include/asm/tlbflush.h
@@ -144,6 +144,19 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
}
/*
+ * Used to invalidate the TLB (walk caches) corresponding to intermediate page
+ * table levels (pgd/pud/pmd).
+ */
+static inline void __flush_tlb_pgtable(struct mm_struct *mm,
+ unsigned long uaddr)
+{
+ unsigned long addr = uaddr >> 12 | ((unsigned long)ASID(mm) << 48);
+
+ dsb(ishst);
+ asm("tlbi vae1is, %0" : : "r" (addr));
+ dsb(ish);
+}
+/*
* On AArch64, the cache coherency is handled via the set_pte_at() function.
*/
static inline void update_mmu_cache(struct vm_area_struct *vma,
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 3ef77a466018..c154c0b7eb60 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -191,6 +191,9 @@ struct kvm_arch_memory_slot {
/* Highest supported SPI, from VGIC_NR_IRQS */
#define KVM_ARM_IRQ_GIC_MAX 127
+/* One single KVM irqchip, ie. the VGIC */
+#define KVM_NR_IRQCHIPS 1
+
/* PSCI interface */
#define KVM_PSCI_FN_BASE 0x95c1ba5e
#define KVM_PSCI_FN(n) (KVM_PSCI_FN_BASE + (n))
diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c
index b42c7b480e1e..ab21e0d58278 100644
--- a/arch/arm64/kernel/efi.c
+++ b/arch/arm64/kernel/efi.c
@@ -337,7 +337,11 @@ core_initcall(arm64_dmi_init);
static void efi_set_pgd(struct mm_struct *mm)
{
- cpu_switch_mm(mm->pgd, mm);
+ if (mm == &init_mm)
+ cpu_set_reserved_ttbr0();
+ else
+ cpu_switch_mm(mm->pgd, mm);
+
flush_tlb_all();
if (icache_is_aivivt())
__flush_icache_all();
@@ -354,3 +358,12 @@ void efi_virtmap_unload(void)
efi_set_pgd(current->active_mm);
preempt_enable();
}
+
+/*
+ * UpdateCapsule() depends on the system being shutdown via
+ * ResetSystem().
+ */
+bool efi_poweroff_required(void)
+{
+ return efi_enabled(EFI_RUNTIME_SERVICES);
+}
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 8ce88e08c030..07f930540f4a 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -585,8 +585,8 @@ ENDPROC(set_cpu_boot_mode_flag)
* zeroing of .bss would clobber it.
*/
.pushsection .data..cacheline_aligned
-ENTRY(__boot_cpu_mode)
.align L1_CACHE_SHIFT
+ENTRY(__boot_cpu_mode)
.long BOOT_CPU_MODE_EL2
.long 0
.popsection
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index fde9923af859..c6b1f3b96f45 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -21,6 +21,7 @@
#include <stdarg.h>
#include <linux/compat.h>
+#include <linux/efi.h>
#include <linux/export.h>
#include <linux/sched.h>
#include <linux/kernel.h>
@@ -150,6 +151,13 @@ void machine_restart(char *cmd)
local_irq_disable();
smp_send_stop();
+ /*
+ * UpdateCapsule() depends on the system being reset via
+ * ResetSystem().
+ */
+ if (efi_enabled(EFI_RUNTIME_SERVICES))
+ efi_reboot(reboot_mode, NULL);
+
/* Now call the architecture specific reboot code. */
if (arm_pm_restart)
arm_pm_restart(reboot_mode, cmd);
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 32aeea083d93..ec37ab3f524f 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -200,7 +200,7 @@ up_fail:
void update_vsyscall(struct timekeeper *tk)
{
struct timespec xtime_coarse;
- u32 use_syscall = strcmp(tk->tkr.clock->name, "arch_sys_counter");
+ u32 use_syscall = strcmp(tk->tkr_mono.clock->name, "arch_sys_counter");
++vdso_data->tb_seq_count;
smp_wmb();
@@ -213,11 +213,11 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec;
if (!use_syscall) {
- vdso_data->cs_cycle_last = tk->tkr.cycle_last;
+ vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
- vdso_data->cs_mult = tk->tkr.mult;
- vdso_data->cs_shift = tk->tkr.shift;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
+ vdso_data->cs_mult = tk->tkr_mono.mult;
+ vdso_data->cs_shift = tk->tkr_mono.shift;
}
smp_wmb();
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index f5590c81d95f..5105e297ed5f 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -18,6 +18,7 @@ if VIRTUALIZATION
config KVM
bool "Kernel-based Virtual Machine (KVM) support"
+ depends on OF
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select ANON_INODES
@@ -25,10 +26,10 @@ config KVM
select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_ARM_HOST
- select KVM_ARM_VGIC
- select KVM_ARM_TIMER
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select SRCU
+ select HAVE_KVM_EVENTFD
+ select HAVE_KVM_IRQFD
---help---
Support hosting virtualized guest machines.
@@ -50,17 +51,4 @@ config KVM_ARM_MAX_VCPUS
large, so only choose a reasonable number that you expect to
actually use.
-config KVM_ARM_VGIC
- bool
- depends on KVM_ARM_HOST && OF
- select HAVE_KVM_IRQCHIP
- ---help---
- Adds support for a hardware assisted, in-kernel GIC emulation.
-
-config KVM_ARM_TIMER
- bool
- depends on KVM_ARM_VGIC
- ---help---
- Adds support for the Architected Timers in virtual machines.
-
endif # VIRTUALIZATION
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 4e6e09ee4033..d5904f876cdb 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module
#
-ccflags-y += -Ivirt/kvm -Iarch/arm64/kvm
+ccflags-y += -Iarch/arm64/kvm
CFLAGS_arm.o := -I.
CFLAGS_mmu.o := -I.
@@ -11,7 +11,7 @@ ARM=../../../arch/arm/kvm
obj-$(CONFIG_KVM_ARM_HOST) += kvm.o
-kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o
kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
@@ -19,11 +19,11 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o sys_regs_generic_v8.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v2-switch.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v3-emul.o
-kvm-$(CONFIG_KVM_ARM_VGIC) += vgic-v3-switch.o
-kvm-$(CONFIG_KVM_ARM_TIMER) += $(KVM)/arm/arch_timer.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v2-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o
+kvm-$(CONFIG_KVM_ARM_HOST) += vgic-v3-switch.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 58e0c2bdde04..ef7d112f5ce0 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -51,7 +51,7 @@ static int __init early_coherent_pool(char *p)
}
early_param("coherent_pool", early_coherent_pool);
-static void *__alloc_from_pool(size_t size, struct page **ret_page)
+static void *__alloc_from_pool(size_t size, struct page **ret_page, gfp_t flags)
{
unsigned long val;
void *ptr = NULL;
@@ -67,6 +67,8 @@ static void *__alloc_from_pool(size_t size, struct page **ret_page)
*ret_page = phys_to_page(phys);
ptr = (void *)val;
+ if (flags & __GFP_ZERO)
+ memset(ptr, 0, size);
}
return ptr;
@@ -101,6 +103,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
flags |= GFP_DMA;
if (IS_ENABLED(CONFIG_DMA_CMA) && (flags & __GFP_WAIT)) {
struct page *page;
+ void *addr;
size = PAGE_ALIGN(size);
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
@@ -109,7 +112,10 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size,
return NULL;
*dma_handle = phys_to_dma(dev, page_to_phys(page));
- return page_address(page);
+ addr = page_address(page);
+ if (flags & __GFP_ZERO)
+ memset(addr, 0, size);
+ return addr;
} else {
return swiotlb_alloc_coherent(dev, size, dma_handle, flags);
}
@@ -146,7 +152,7 @@ static void *__dma_alloc(struct device *dev, size_t size,
if (!coherent && !(flags & __GFP_WAIT)) {
struct page *page = NULL;
- void *addr = __alloc_from_pool(size, &page);
+ void *addr = __alloc_from_pool(size, &page, flags);
if (addr)
*dma_handle = phys_to_dma(dev, page_to_phys(page));
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index bb0ea94c4ba1..1d3ec3ddd84b 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -51,7 +51,10 @@ static int change_memory_common(unsigned long addr, int numpages,
WARN_ON_ONCE(1);
}
- if (!is_module_address(start) || !is_module_address(end - 1))
+ if (start < MODULES_VADDR || start >= MODULES_END)
+ return -EINVAL;
+
+ if (end < MODULES_VADDR || end >= MODULES_END)
return -EINVAL;
data.set_mask = set_mask;
diff --git a/arch/c6x/include/asm/pgtable.h b/arch/c6x/include/asm/pgtable.h
index 78d4483ba40c..ec4db6df5e0d 100644
--- a/arch/c6x/include/asm/pgtable.h
+++ b/arch/c6x/include/asm/pgtable.h
@@ -67,6 +67,11 @@ extern unsigned long empty_zero_page;
*/
#define pgtable_cache_init() do { } while (0)
+/*
+ * c6x is !MMU, so define the simpliest implementation
+ */
+#define pgprot_writecombine pgprot_noncached
+
#include <asm-generic/pgtable.h>
#endif /* _ASM_C6X_PGTABLE_H */
diff --git a/arch/metag/include/asm/io.h b/arch/metag/include/asm/io.h
index 9359e5048442..d5779b0ec573 100644
--- a/arch/metag/include/asm/io.h
+++ b/arch/metag/include/asm/io.h
@@ -2,6 +2,7 @@
#define _ASM_METAG_IO_H
#include <linux/types.h>
+#include <asm/pgtable-bits.h>
#define IO_SPACE_LIMIT 0
diff --git a/arch/metag/include/asm/pgtable-bits.h b/arch/metag/include/asm/pgtable-bits.h
new file mode 100644
index 000000000000..25ba6729f496
--- /dev/null
+++ b/arch/metag/include/asm/pgtable-bits.h
@@ -0,0 +1,104 @@
+/*
+ * Meta page table definitions.
+ */
+
+#ifndef _METAG_PGTABLE_BITS_H
+#define _METAG_PGTABLE_BITS_H
+
+#include <asm/metag_mem.h>
+
+/*
+ * Definitions for MMU descriptors
+ *
+ * These are the hardware bits in the MMCU pte entries.
+ * Derived from the Meta toolkit headers.
+ */
+#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
+#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
+#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
+/* Write combine bit - this can cause writes to occur out of order */
+#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
+/* Sys coherent bit - this bit is never used by Linux */
+#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
+#define _PAGE_ALWAYS_ZERO_1 0x020
+#define _PAGE_CACHE_CTRL0 0x040
+#define _PAGE_CACHE_CTRL1 0x080
+#define _PAGE_ALWAYS_ZERO_2 0x100
+#define _PAGE_ALWAYS_ZERO_3 0x200
+#define _PAGE_ALWAYS_ZERO_4 0x400
+#define _PAGE_ALWAYS_ZERO_5 0x800
+
+/* These are software bits that we stuff into the gaps in the hardware
+ * pte entries that are not used. Note, these DO get stored in the actual
+ * hardware, but the hardware just does not use them.
+ */
+#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
+#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
+
+/* Pages owned, and protected by, the kernel. */
+#define _PAGE_KERNEL _PAGE_PRIV
+
+/* No cacheing of this page */
+#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
+/* burst cacheing - good for data streaming */
+#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
+/* One cache way per thread */
+#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
+/* Full on cacheing */
+#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
+
+#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
+
+/* which bits are used for cache control ... */
+#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
+ _PAGE_WR_COMBINE)
+
+/* This is a mask of the bits that pte_modify is allowed to change. */
+#define _PAGE_CHG_MASK (PAGE_MASK)
+
+#define _PAGE_SZ_SHIFT 1
+#define _PAGE_SZ_4K (0x0)
+#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
+#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
+
+#if defined(CONFIG_PAGE_SIZE_4K)
+#define _PAGE_SZ (_PAGE_SZ_4K)
+#elif defined(CONFIG_PAGE_SIZE_8K)
+#define _PAGE_SZ (_PAGE_SZ_8K)
+#elif defined(CONFIG_PAGE_SIZE_16K)
+#define _PAGE_SZ (_PAGE_SZ_16K)
+#endif
+#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
+
+#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
+# define _PAGE_SZHUGE (_PAGE_SZ_8K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
+# define _PAGE_SZHUGE (_PAGE_SZ_16K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
+# define _PAGE_SZHUGE (_PAGE_SZ_32K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
+# define _PAGE_SZHUGE (_PAGE_SZ_64K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
+# define _PAGE_SZHUGE (_PAGE_SZ_128K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
+# define _PAGE_SZHUGE (_PAGE_SZ_256K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
+# define _PAGE_SZHUGE (_PAGE_SZ_512K)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
+# define _PAGE_SZHUGE (_PAGE_SZ_1M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
+# define _PAGE_SZHUGE (_PAGE_SZ_2M)
+#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
+# define _PAGE_SZHUGE (_PAGE_SZ_4M)
+#endif
+
+#endif /* _METAG_PGTABLE_BITS_H */
diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h
index d0604c0a8702..ffa3a3a2ecad 100644
--- a/arch/metag/include/asm/pgtable.h
+++ b/arch/metag/include/asm/pgtable.h
@@ -5,6 +5,7 @@
#ifndef _METAG_PGTABLE_H
#define _METAG_PGTABLE_H
+#include <asm/pgtable-bits.h>
#include <asm-generic/pgtable-nopmd.h>
/* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */
@@ -21,100 +22,6 @@
#endif
/*
- * Definitions for MMU descriptors
- *
- * These are the hardware bits in the MMCU pte entries.
- * Derived from the Meta toolkit headers.
- */
-#define _PAGE_PRESENT MMCU_ENTRY_VAL_BIT
-#define _PAGE_WRITE MMCU_ENTRY_WR_BIT
-#define _PAGE_PRIV MMCU_ENTRY_PRIV_BIT
-/* Write combine bit - this can cause writes to occur out of order */
-#define _PAGE_WR_COMBINE MMCU_ENTRY_WRC_BIT
-/* Sys coherent bit - this bit is never used by Linux */
-#define _PAGE_SYS_COHERENT MMCU_ENTRY_SYS_BIT
-#define _PAGE_ALWAYS_ZERO_1 0x020
-#define _PAGE_CACHE_CTRL0 0x040
-#define _PAGE_CACHE_CTRL1 0x080
-#define _PAGE_ALWAYS_ZERO_2 0x100
-#define _PAGE_ALWAYS_ZERO_3 0x200
-#define _PAGE_ALWAYS_ZERO_4 0x400
-#define _PAGE_ALWAYS_ZERO_5 0x800
-
-/* These are software bits that we stuff into the gaps in the hardware
- * pte entries that are not used. Note, these DO get stored in the actual
- * hardware, but the hardware just does not use them.
- */
-#define _PAGE_ACCESSED _PAGE_ALWAYS_ZERO_1
-#define _PAGE_DIRTY _PAGE_ALWAYS_ZERO_2
-
-/* Pages owned, and protected by, the kernel. */
-#define _PAGE_KERNEL _PAGE_PRIV
-
-/* No cacheing of this page */
-#define _PAGE_CACHE_WIN0 (MMCU_CWIN_UNCACHED << MMCU_ENTRY_CWIN_S)
-/* burst cacheing - good for data streaming */
-#define _PAGE_CACHE_WIN1 (MMCU_CWIN_BURST << MMCU_ENTRY_CWIN_S)
-/* One cache way per thread */
-#define _PAGE_CACHE_WIN2 (MMCU_CWIN_C1SET << MMCU_ENTRY_CWIN_S)
-/* Full on cacheing */
-#define _PAGE_CACHE_WIN3 (MMCU_CWIN_CACHED << MMCU_ENTRY_CWIN_S)
-
-#define _PAGE_CACHEABLE (_PAGE_CACHE_WIN3 | _PAGE_WR_COMBINE)
-
-/* which bits are used for cache control ... */
-#define _PAGE_CACHE_MASK (_PAGE_CACHE_CTRL0 | _PAGE_CACHE_CTRL1 | \
- _PAGE_WR_COMBINE)
-
-/* This is a mask of the bits that pte_modify is allowed to change. */
-#define _PAGE_CHG_MASK (PAGE_MASK)
-
-#define _PAGE_SZ_SHIFT 1
-#define _PAGE_SZ_4K (0x0)
-#define _PAGE_SZ_8K (0x1 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_16K (0x2 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_32K (0x3 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_64K (0x4 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_128K (0x5 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_256K (0x6 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_512K (0x7 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_1M (0x8 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_2M (0x9 << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_4M (0xa << _PAGE_SZ_SHIFT)
-#define _PAGE_SZ_MASK (0xf << _PAGE_SZ_SHIFT)
-
-#if defined(CONFIG_PAGE_SIZE_4K)
-#define _PAGE_SZ (_PAGE_SZ_4K)
-#elif defined(CONFIG_PAGE_SIZE_8K)
-#define _PAGE_SZ (_PAGE_SZ_8K)
-#elif defined(CONFIG_PAGE_SIZE_16K)
-#define _PAGE_SZ (_PAGE_SZ_16K)
-#endif
-#define _PAGE_TABLE (_PAGE_SZ | _PAGE_PRESENT)
-
-#if defined(CONFIG_HUGETLB_PAGE_SIZE_8K)
-# define _PAGE_SZHUGE (_PAGE_SZ_8K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_16K)
-# define _PAGE_SZHUGE (_PAGE_SZ_16K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_32K)
-# define _PAGE_SZHUGE (_PAGE_SZ_32K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_64K)
-# define _PAGE_SZHUGE (_PAGE_SZ_64K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_128K)
-# define _PAGE_SZHUGE (_PAGE_SZ_128K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_256K)
-# define _PAGE_SZHUGE (_PAGE_SZ_256K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_512K)
-# define _PAGE_SZHUGE (_PAGE_SZ_512K)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_1M)
-# define _PAGE_SZHUGE (_PAGE_SZ_1M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_2M)
-# define _PAGE_SZHUGE (_PAGE_SZ_2M)
-#elif defined(CONFIG_HUGETLB_PAGE_SIZE_4M)
-# define _PAGE_SZHUGE (_PAGE_SZ_4M)
-#endif
-
-/*
* The Linux memory management assumes a three-level page table setup. On
* Meta, we use that, but "fold" the mid level into the top-level page
* table.
diff --git a/arch/metag/include/asm/processor.h b/arch/metag/include/asm/processor.h
index 881071c07942..13272fd5a5ba 100644
--- a/arch/metag/include/asm/processor.h
+++ b/arch/metag/include/asm/processor.h
@@ -149,8 +149,8 @@ extern void exit_thread(void);
unsigned long get_wchan(struct task_struct *p);
-#define KSTK_EIP(tsk) ((tsk)->thread.kernel_context->CurrPC)
-#define KSTK_ESP(tsk) ((tsk)->thread.kernel_context->AX[0].U0)
+#define KSTK_EIP(tsk) (task_pt_regs(tsk)->ctx.CurrPC)
+#define KSTK_ESP(tsk) (task_pt_regs(tsk)->ctx.AX[0].U0)
#define user_stack_pointer(regs) ((regs)->ctx.AX[0].U0)
diff --git a/arch/microblaze/kernel/entry.S b/arch/microblaze/kernel/entry.S
index 0536bc021cc6..ef548510b951 100644
--- a/arch/microblaze/kernel/entry.S
+++ b/arch/microblaze/kernel/entry.S
@@ -348,8 +348,9 @@ C_ENTRY(_user_exception):
* The LP register should point to the location where the called function
* should return. [note that MAKE_SYS_CALL uses label 1] */
/* See if the system call number is valid */
+ blti r12, 5f
addi r11, r12, -__NR_syscalls;
- bgei r11,5f;
+ bgei r11, 5f;
/* Figure out which function to use for this system call. */
/* Note Microblaze barrel shift is optional, so don't rely on it */
add r12, r12, r12; /* convert num -> ptr */
@@ -375,7 +376,7 @@ C_ENTRY(_user_exception):
/* The syscall number is invalid, return an error. */
5:
- rtsd r15, 8; /* looks like a normal subroutine return */
+ braid ret_from_trap
addi r3, r0, -ENOSYS;
/* Entry point used to return from a syscall/trap */
@@ -411,7 +412,7 @@ C_ENTRY(ret_from_trap):
bri 1b
/* Maybe handle a signal */
-5:
+5:
andi r11, r19, _TIF_SIGPENDING | _TIF_NOTIFY_RESUME;
beqi r11, 4f; /* Signals to handle, handle them */
diff --git a/arch/mips/include/asm/asmmacro-32.h b/arch/mips/include/asm/asmmacro-32.h
index cdac7b3eeaf7..80386470d3a4 100644
--- a/arch/mips/include/asm/asmmacro-32.h
+++ b/arch/mips/include/asm/asmmacro-32.h
@@ -16,38 +16,38 @@
.set push
SET_HARDFLOAT
cfc1 \tmp, fcr31
- swc1 $f0, THREAD_FPR0_LS64(\thread)
- swc1 $f1, THREAD_FPR1_LS64(\thread)
- swc1 $f2, THREAD_FPR2_LS64(\thread)
- swc1 $f3, THREAD_FPR3_LS64(\thread)
- swc1 $f4, THREAD_FPR4_LS64(\thread)
- swc1 $f5, THREAD_FPR5_LS64(\thread)
- swc1 $f6, THREAD_FPR6_LS64(\thread)
- swc1 $f7, THREAD_FPR7_LS64(\thread)
- swc1 $f8, THREAD_FPR8_LS64(\thread)
- swc1 $f9, THREAD_FPR9_LS64(\thread)
- swc1 $f10, THREAD_FPR10_LS64(\thread)
- swc1 $f11, THREAD_FPR11_LS64(\thread)
- swc1 $f12, THREAD_FPR12_LS64(\thread)
- swc1 $f13, THREAD_FPR13_LS64(\thread)
- swc1 $f14, THREAD_FPR14_LS64(\thread)
- swc1 $f15, THREAD_FPR15_LS64(\thread)
- swc1 $f16, THREAD_FPR16_LS64(\thread)
- swc1 $f17, THREAD_FPR17_LS64(\thread)
- swc1 $f18, THREAD_FPR18_LS64(\thread)
- swc1 $f19, THREAD_FPR19_LS64(\thread)
- swc1 $f20, THREAD_FPR20_LS64(\thread)
- swc1 $f21, THREAD_FPR21_LS64(\thread)
- swc1 $f22, THREAD_FPR22_LS64(\thread)
- swc1 $f23, THREAD_FPR23_LS64(\thread)
- swc1 $f24, THREAD_FPR24_LS64(\thread)
- swc1 $f25, THREAD_FPR25_LS64(\thread)
- swc1 $f26, THREAD_FPR26_LS64(\thread)
- swc1 $f27, THREAD_FPR27_LS64(\thread)
- swc1 $f28, THREAD_FPR28_LS64(\thread)
- swc1 $f29, THREAD_FPR29_LS64(\thread)
- swc1 $f30, THREAD_FPR30_LS64(\thread)
- swc1 $f31, THREAD_FPR31_LS64(\thread)
+ swc1 $f0, THREAD_FPR0(\thread)
+ swc1 $f1, THREAD_FPR1(\thread)
+ swc1 $f2, THREAD_FPR2(\thread)
+ swc1 $f3, THREAD_FPR3(\thread)
+ swc1 $f4, THREAD_FPR4(\thread)
+ swc1 $f5, THREAD_FPR5(\thread)
+ swc1 $f6, THREAD_FPR6(\thread)
+ swc1 $f7, THREAD_FPR7(\thread)
+ swc1 $f8, THREAD_FPR8(\thread)
+ swc1 $f9, THREAD_FPR9(\thread)
+ swc1 $f10, THREAD_FPR10(\thread)
+ swc1 $f11, THREAD_FPR11(\thread)
+ swc1 $f12, THREAD_FPR12(\thread)
+ swc1 $f13, THREAD_FPR13(\thread)
+ swc1 $f14, THREAD_FPR14(\thread)
+ swc1 $f15, THREAD_FPR15(\thread)
+ swc1 $f16, THREAD_FPR16(\thread)
+ swc1 $f17, THREAD_FPR17(\thread)
+ swc1 $f18, THREAD_FPR18(\thread)
+ swc1 $f19, THREAD_FPR19(\thread)
+ swc1 $f20, THREAD_FPR20(\thread)
+ swc1 $f21, THREAD_FPR21(\thread)
+ swc1 $f22, THREAD_FPR22(\thread)
+ swc1 $f23, THREAD_FPR23(\thread)
+ swc1 $f24, THREAD_FPR24(\thread)
+ swc1 $f25, THREAD_FPR25(\thread)
+ swc1 $f26, THREAD_FPR26(\thread)
+ swc1 $f27, THREAD_FPR27(\thread)
+ swc1 $f28, THREAD_FPR28(\thread)
+ swc1 $f29, THREAD_FPR29(\thread)
+ swc1 $f30, THREAD_FPR30(\thread)
+ swc1 $f31, THREAD_FPR31(\thread)
sw \tmp, THREAD_FCR31(\thread)
.set pop
.endm
@@ -56,38 +56,38 @@
.set push
SET_HARDFLOAT
lw \tmp, THREAD_FCR31(\thread)
- lwc1 $f0, THREAD_FPR0_LS64(\thread)
- lwc1 $f1, THREAD_FPR1_LS64(\thread)
- lwc1 $f2, THREAD_FPR2_LS64(\thread)
- lwc1 $f3, THREAD_FPR3_LS64(\thread)
- lwc1 $f4, THREAD_FPR4_LS64(\thread)
- lwc1 $f5, THREAD_FPR5_LS64(\thread)
- lwc1 $f6, THREAD_FPR6_LS64(\thread)
- lwc1 $f7, THREAD_FPR7_LS64(\thread)
- lwc1 $f8, THREAD_FPR8_LS64(\thread)
- lwc1 $f9, THREAD_FPR9_LS64(\thread)
- lwc1 $f10, THREAD_FPR10_LS64(\thread)
- lwc1 $f11, THREAD_FPR11_LS64(\thread)
- lwc1 $f12, THREAD_FPR12_LS64(\thread)
- lwc1 $f13, THREAD_FPR13_LS64(\thread)
- lwc1 $f14, THREAD_FPR14_LS64(\thread)
- lwc1 $f15, THREAD_FPR15_LS64(\thread)
- lwc1 $f16, THREAD_FPR16_LS64(\thread)
- lwc1 $f17, THREAD_FPR17_LS64(\thread)
- lwc1 $f18, THREAD_FPR18_LS64(\thread)
- lwc1 $f19, THREAD_FPR19_LS64(\thread)
- lwc1 $f20, THREAD_FPR20_LS64(\thread)
- lwc1 $f21, THREAD_FPR21_LS64(\thread)
- lwc1 $f22, THREAD_FPR22_LS64(\thread)
- lwc1 $f23, THREAD_FPR23_LS64(\thread)
- lwc1 $f24, THREAD_FPR24_LS64(\thread)
- lwc1 $f25, THREAD_FPR25_LS64(\thread)
- lwc1 $f26, THREAD_FPR26_LS64(\thread)
- lwc1 $f27, THREAD_FPR27_LS64(\thread)
- lwc1 $f28, THREAD_FPR28_LS64(\thread)
- lwc1 $f29, THREAD_FPR29_LS64(\thread)
- lwc1 $f30, THREAD_FPR30_LS64(\thread)
- lwc1 $f31, THREAD_FPR31_LS64(\thread)
+ lwc1 $f0, THREAD_FPR0(\thread)
+ lwc1 $f1, THREAD_FPR1(\thread)
+ lwc1 $f2, THREAD_FPR2(\thread)
+ lwc1 $f3, THREAD_FPR3(\thread)
+ lwc1 $f4, THREAD_FPR4(\thread)
+ lwc1 $f5, THREAD_FPR5(\thread)
+ lwc1 $f6, THREAD_FPR6(\thread)
+ lwc1 $f7, THREAD_FPR7(\thread)
+ lwc1 $f8, THREAD_FPR8(\thread)
+ lwc1 $f9, THREAD_FPR9(\thread)
+ lwc1 $f10, THREAD_FPR10(\thread)
+ lwc1 $f11, THREAD_FPR11(\thread)
+ lwc1 $f12, THREAD_FPR12(\thread)
+ lwc1 $f13, THREAD_FPR13(\thread)
+ lwc1 $f14, THREAD_FPR14(\thread)
+ lwc1 $f15, THREAD_FPR15(\thread)
+ lwc1 $f16, THREAD_FPR16(\thread)
+ lwc1 $f17, THREAD_FPR17(\thread)
+ lwc1 $f18, THREAD_FPR18(\thread)
+ lwc1 $f19, THREAD_FPR19(\thread)
+ lwc1 $f20, THREAD_FPR20(\thread)
+ lwc1 $f21, THREAD_FPR21(\thread)
+ lwc1 $f22, THREAD_FPR22(\thread)
+ lwc1 $f23, THREAD_FPR23(\thread)
+ lwc1 $f24, THREAD_FPR24(\thread)
+ lwc1 $f25, THREAD_FPR25(\thread)
+ lwc1 $f26, THREAD_FPR26(\thread)
+ lwc1 $f27, THREAD_FPR27(\thread)
+ lwc1 $f28, THREAD_FPR28(\thread)
+ lwc1 $f29, THREAD_FPR29(\thread)
+ lwc1 $f30, THREAD_FPR30(\thread)
+ lwc1 $f31, THREAD_FPR31(\thread)
ctc1 \tmp, fcr31
.set pop
.endm
diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h
index 0cae4595e985..6156ac8c4cfb 100644
--- a/arch/mips/include/asm/asmmacro.h
+++ b/arch/mips/include/asm/asmmacro.h
@@ -60,22 +60,22 @@
.set push
SET_HARDFLOAT
cfc1 \tmp, fcr31
- sdc1 $f0, THREAD_FPR0_LS64(\thread)
- sdc1 $f2, THREAD_FPR2_LS64(\thread)
- sdc1 $f4, THREAD_FPR4_LS64(\thread)
- sdc1 $f6, THREAD_FPR6_LS64(\thread)
- sdc1 $f8, THREAD_FPR8_LS64(\thread)
- sdc1 $f10, THREAD_FPR10_LS64(\thread)
- sdc1 $f12, THREAD_FPR12_LS64(\thread)
- sdc1 $f14, THREAD_FPR14_LS64(\thread)
- sdc1 $f16, THREAD_FPR16_LS64(\thread)
- sdc1 $f18, THREAD_FPR18_LS64(\thread)
- sdc1 $f20, THREAD_FPR20_LS64(\thread)
- sdc1 $f22, THREAD_FPR22_LS64(\thread)
- sdc1 $f24, THREAD_FPR24_LS64(\thread)
- sdc1 $f26, THREAD_FPR26_LS64(\thread)
- sdc1 $f28, THREAD_FPR28_LS64(\thread)
- sdc1 $f30, THREAD_FPR30_LS64(\thread)
+ sdc1 $f0, THREAD_FPR0(\thread)
+ sdc1 $f2, THREAD_FPR2(\thread)
+ sdc1 $f4, THREAD_FPR4(\thread)
+ sdc1 $f6, THREAD_FPR6(\thread)
+ sdc1 $f8, THREAD_FPR8(\thread)
+ sdc1 $f10, THREAD_FPR10(\thread)
+ sdc1 $f12, THREAD_FPR12(\thread)
+ sdc1 $f14, THREAD_FPR14(\thread)
+ sdc1 $f16, THREAD_FPR16(\thread)
+ sdc1 $f18, THREAD_FPR18(\thread)
+ sdc1 $f20, THREAD_FPR20(\thread)
+ sdc1 $f22, THREAD_FPR22(\thread)
+ sdc1 $f24, THREAD_FPR24(\thread)
+ sdc1 $f26, THREAD_FPR26(\thread)
+ sdc1 $f28, THREAD_FPR28(\thread)
+ sdc1 $f30, THREAD_FPR30(\thread)
sw \tmp, THREAD_FCR31(\thread)
.set pop
.endm
@@ -84,22 +84,22 @@
.set push
.set mips64r2
SET_HARDFLOAT
- sdc1 $f1, THREAD_FPR1_LS64(\thread)
- sdc1 $f3, THREAD_FPR3_LS64(\thread)
- sdc1 $f5, THREAD_FPR5_LS64(\thread)
- sdc1 $f7, THREAD_FPR7_LS64(\thread)
- sdc1 $f9, THREAD_FPR9_LS64(\thread)
- sdc1 $f11, THREAD_FPR11_LS64(\thread)
- sdc1 $f13, THREAD_FPR13_LS64(\thread)
- sdc1 $f15, THREAD_FPR15_LS64(\thread)
- sdc1 $f17, THREAD_FPR17_LS64(\thread)
- sdc1 $f19, THREAD_FPR19_LS64(\thread)
- sdc1 $f21, THREAD_FPR21_LS64(\thread)
- sdc1 $f23, THREAD_FPR23_LS64(\thread)
- sdc1 $f25, THREAD_FPR25_LS64(\thread)
- sdc1 $f27, THREAD_FPR27_LS64(\thread)
- sdc1 $f29, THREAD_FPR29_LS64(\thread)
- sdc1 $f31, THREAD_FPR31_LS64(\thread)
+ sdc1 $f1, THREAD_FPR1(\thread)
+ sdc1 $f3, THREAD_FPR3(\thread)
+ sdc1 $f5, THREAD_FPR5(\thread)
+ sdc1 $f7, THREAD_FPR7(\thread)
+ sdc1 $f9, THREAD_FPR9(\thread)
+ sdc1 $f11, THREAD_FPR11(\thread)
+ sdc1 $f13, THREAD_FPR13(\thread)
+ sdc1 $f15, THREAD_FPR15(\thread)
+ sdc1 $f17, THREAD_FPR17(\thread)
+ sdc1 $f19, THREAD_FPR19(\thread)
+ sdc1 $f21, THREAD_FPR21(\thread)
+ sdc1 $f23, THREAD_FPR23(\thread)
+ sdc1 $f25, THREAD_FPR25(\thread)
+ sdc1 $f27, THREAD_FPR27(\thread)
+ sdc1 $f29, THREAD_FPR29(\thread)
+ sdc1 $f31, THREAD_FPR31(\thread)
.set pop
.endm
@@ -118,22 +118,22 @@
.set push
SET_HARDFLOAT
lw \tmp, THREAD_FCR31(\thread)
- ldc1 $f0, THREAD_FPR0_LS64(\thread)
- ldc1 $f2, THREAD_FPR2_LS64(\thread)
- ldc1 $f4, THREAD_FPR4_LS64(\thread)
- ldc1 $f6, THREAD_FPR6_LS64(\thread)
- ldc1 $f8, THREAD_FPR8_LS64(\thread)
- ldc1 $f10, THREAD_FPR10_LS64(\thread)
- ldc1 $f12, THREAD_FPR12_LS64(\thread)
- ldc1 $f14, THREAD_FPR14_LS64(\thread)
- ldc1 $f16, THREAD_FPR16_LS64(\thread)
- ldc1 $f18, THREAD_FPR18_LS64(\thread)
- ldc1 $f20, THREAD_FPR20_LS64(\thread)
- ldc1 $f22, THREAD_FPR22_LS64(\thread)
- ldc1 $f24, THREAD_FPR24_LS64(\thread)
- ldc1 $f26, THREAD_FPR26_LS64(\thread)
- ldc1 $f28, THREAD_FPR28_LS64(\thread)
- ldc1 $f30, THREAD_FPR30_LS64(\thread)
+ ldc1 $f0, THREAD_FPR0(\thread)
+ ldc1 $f2, THREAD_FPR2(\thread)
+ ldc1 $f4, THREAD_FPR4(\thread)
+ ldc1 $f6, THREAD_FPR6(\thread)
+ ldc1 $f8, THREAD_FPR8(\thread)
+ ldc1 $f10, THREAD_FPR10(\thread)
+ ldc1 $f12, THREAD_FPR12(\thread)
+ ldc1 $f14, THREAD_FPR14(\thread)
+ ldc1 $f16, THREAD_FPR16(\thread)
+ ldc1 $f18, THREAD_FPR18(\thread)
+ ldc1 $f20, THREAD_FPR20(\thread)
+ ldc1 $f22, THREAD_FPR22(\thread)
+ ldc1 $f24, THREAD_FPR24(\thread)
+ ldc1 $f26, THREAD_FPR26(\thread)
+ ldc1 $f28, THREAD_FPR28(\thread)
+ ldc1 $f30, THREAD_FPR30(\thread)
ctc1 \tmp, fcr31
.endm
@@ -141,22 +141,22 @@
.set push
.set mips64r2
SET_HARDFLOAT
- ldc1 $f1, THREAD_FPR1_LS64(\thread)
- ldc1 $f3, THREAD_FPR3_LS64(\thread)
- ldc1 $f5, THREAD_FPR5_LS64(\thread)
- ldc1 $f7, THREAD_FPR7_LS64(\thread)
- ldc1 $f9, THREAD_FPR9_LS64(\thread)
- ldc1 $f11, THREAD_FPR11_LS64(\thread)
- ldc1 $f13, THREAD_FPR13_LS64(\thread)
- ldc1 $f15, THREAD_FPR15_LS64(\thread)
- ldc1 $f17, THREAD_FPR17_LS64(\thread)
- ldc1 $f19, THREAD_FPR19_LS64(\thread)
- ldc1 $f21, THREAD_FPR21_LS64(\thread)
- ldc1 $f23, THREAD_FPR23_LS64(\thread)
- ldc1 $f25, THREAD_FPR25_LS64(\thread)
- ldc1 $f27, THREAD_FPR27_LS64(\thread)
- ldc1 $f29, THREAD_FPR29_LS64(\thread)
- ldc1 $f31, THREAD_FPR31_LS64(\thread)
+ ldc1 $f1, THREAD_FPR1(\thread)
+ ldc1 $f3, THREAD_FPR3(\thread)
+ ldc1 $f5, THREAD_FPR5(\thread)
+ ldc1 $f7, THREAD_FPR7(\thread)
+ ldc1 $f9, THREAD_FPR9(\thread)
+ ldc1 $f11, THREAD_FPR11(\thread)
+ ldc1 $f13, THREAD_FPR13(\thread)
+ ldc1 $f15, THREAD_FPR15(\thread)
+ ldc1 $f17, THREAD_FPR17(\thread)
+ ldc1 $f19, THREAD_FPR19(\thread)
+ ldc1 $f21, THREAD_FPR21(\thread)
+ ldc1 $f23, THREAD_FPR23(\thread)
+ ldc1 $f25, THREAD_FPR25(\thread)
+ ldc1 $f27, THREAD_FPR27(\thread)
+ ldc1 $f29, THREAD_FPR29(\thread)
+ ldc1 $f31, THREAD_FPR31(\thread)
.set pop
.endm
@@ -211,6 +211,22 @@
.endm
#ifdef TOOLCHAIN_SUPPORTS_MSA
+ .macro _cfcmsa rd, cs
+ .set push
+ .set mips32r2
+ .set msa
+ cfcmsa \rd, $\cs
+ .set pop
+ .endm
+
+ .macro _ctcmsa cd, rs
+ .set push
+ .set mips32r2
+ .set msa
+ ctcmsa $\cd, \rs
+ .set pop
+ .endm
+
.macro ld_d wd, off, base
.set push
.set mips32r2
@@ -227,35 +243,35 @@
.set pop
.endm
- .macro copy_u_w rd, ws, n
+ .macro copy_u_w ws, n
.set push
.set mips32r2
.set msa
- copy_u.w \rd, $w\ws[\n]
+ copy_u.w $1, $w\ws[\n]
.set pop
.endm
- .macro copy_u_d rd, ws, n
+ .macro copy_u_d ws, n
.set push
.set mips64r2
.set msa
- copy_u.d \rd, $w\ws[\n]
+ copy_u.d $1, $w\ws[\n]
.set pop
.endm
- .macro insert_w wd, n, rs
+ .macro insert_w wd, n
.set push
.set mips32r2
.set msa
- insert.w $w\wd[\n], \rs
+ insert.w $w\wd[\n], $1
.set pop
.endm
- .macro insert_d wd, n, rs
+ .macro insert_d wd, n
.set push
.set mips64r2
.set msa
- insert.d $w\wd[\n], \rs
+ insert.d $w\wd[\n], $1
.set pop
.endm
#else
@@ -283,7 +299,7 @@
/*
* Temporary until all toolchains in use include MSA support.
*/
- .macro cfcmsa rd, cs
+ .macro _cfcmsa rd, cs
.set push
.set noat
SET_HARDFLOAT
@@ -293,7 +309,7 @@
.set pop
.endm
- .macro ctcmsa cd, rs
+ .macro _ctcmsa cd, rs
.set push
.set noat
SET_HARDFLOAT
@@ -320,44 +336,36 @@
.set pop
.endm
- .macro copy_u_w rd, ws, n
+ .macro copy_u_w ws, n
.set push
.set noat
SET_HARDFLOAT
.insn
.word COPY_UW_MSA_INSN | (\n << 16) | (\ws << 11)
- /* move triggers an assembler bug... */
- or \rd, $1, zero
.set pop
.endm
- .macro copy_u_d rd, ws, n
+ .macro copy_u_d ws, n
.set push
.set noat
SET_HARDFLOAT
.insn
.word COPY_UD_MSA_INSN | (\n << 16) | (\ws << 11)
- /* move triggers an assembler bug... */
- or \rd, $1, zero
.set pop
.endm
- .macro insert_w wd, n, rs
+ .macro insert_w wd, n
.set push
.set noat
SET_HARDFLOAT
- /* move triggers an assembler bug... */
- or $1, \rs, zero
.word INSERT_W_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm
- .macro insert_d wd, n, rs
+ .macro insert_d wd, n
.set push
.set noat
SET_HARDFLOAT
- /* move triggers an assembler bug... */
- or $1, \rs, zero
.word INSERT_D_MSA_INSN | (\n << 16) | (\wd << 6)
.set pop
.endm
@@ -399,7 +407,7 @@
.set push
.set noat
SET_HARDFLOAT
- cfcmsa $1, MSA_CSR
+ _cfcmsa $1, MSA_CSR
sw $1, THREAD_MSA_CSR(\thread)
.set pop
.endm
@@ -409,7 +417,7 @@
.set noat
SET_HARDFLOAT
lw $1, THREAD_MSA_CSR(\thread)
- ctcmsa MSA_CSR, $1
+ _ctcmsa MSA_CSR, $1
.set pop
ld_d 0, THREAD_FPR0, \thread
ld_d 1, THREAD_FPR1, \thread
@@ -452,9 +460,6 @@
insert_w \wd, 2
insert_w \wd, 3
#endif
- .if 31-\wd
- msa_init_upper (\wd+1)
- .endif
.endm
.macro msa_init_all_upper
@@ -463,6 +468,37 @@
SET_HARDFLOAT
not $1, zero
msa_init_upper 0
+ msa_init_upper 1
+ msa_init_upper 2
+ msa_init_upper 3
+ msa_init_upper 4
+ msa_init_upper 5
+ msa_init_upper 6
+ msa_init_upper 7
+ msa_init_upper 8
+ msa_init_upper 9
+ msa_init_upper 10
+ msa_init_upper 11
+ msa_init_upper 12
+ msa_init_upper 13
+ msa_init_upper 14
+ msa_init_upper 15
+ msa_init_upper 16
+ msa_init_upper 17
+ msa_init_upper 18
+ msa_init_upper 19
+ msa_init_upper 20
+ msa_init_upper 21
+ msa_init_upper 22
+ msa_init_upper 23
+ msa_init_upper 24
+ msa_init_upper 25
+ msa_init_upper 26
+ msa_init_upper 27
+ msa_init_upper 28
+ msa_init_upper 29
+ msa_init_upper 30
+ msa_init_upper 31
.set pop
.endm
diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h
index dd083e999b08..b104ad9d655f 100644
--- a/arch/mips/include/asm/fpu.h
+++ b/arch/mips/include/asm/fpu.h
@@ -48,6 +48,12 @@ enum fpu_mode {
#define FPU_FR_MASK 0x1
};
+#define __disable_fpu() \
+do { \
+ clear_c0_status(ST0_CU1); \
+ disable_fpu_hazard(); \
+} while (0)
+
static inline int __enable_fpu(enum fpu_mode mode)
{
int fr;
@@ -86,7 +92,12 @@ fr_common:
enable_fpu_hazard();
/* check FR has the desired value */
- return (!!(read_c0_status() & ST0_FR) == !!fr) ? 0 : SIGFPE;
+ if (!!(read_c0_status() & ST0_FR) == !!fr)
+ return 0;
+
+ /* unsupported FR value */
+ __disable_fpu();
+ return SIGFPE;
default:
BUG();
@@ -95,12 +106,6 @@ fr_common:
return SIGFPE;
}
-#define __disable_fpu() \
-do { \
- clear_c0_status(ST0_CU1); \
- disable_fpu_hazard(); \
-} while (0)
-
#define clear_fpu_owner() clear_thread_flag(TIF_USEDFPU)
static inline int __is_fpu_owner(void)
@@ -170,6 +175,7 @@ static inline void lose_fpu(int save)
}
disable_msa();
clear_thread_flag(TIF_USEDMSA);
+ __disable_fpu();
} else if (is_fpu_owner()) {
if (save)
_save_fp(current);
diff --git a/arch/mips/include/asm/jump_label.h b/arch/mips/include/asm/jump_label.h
index fdbff44e5482..608aa57799c8 100644
--- a/arch/mips/include/asm/jump_label.h
+++ b/arch/mips/include/asm/jump_label.h
@@ -8,9 +8,9 @@
#ifndef _ASM_MIPS_JUMP_LABEL_H
#define _ASM_MIPS_JUMP_LABEL_H
-#include <linux/types.h>
+#ifndef __ASSEMBLY__
-#ifdef __KERNEL__
+#include <linux/types.h>
#define JUMP_LABEL_NOP_SIZE 4
@@ -39,8 +39,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
#ifdef CONFIG_64BIT
typedef u64 jump_label_t;
#else
@@ -53,4 +51,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif /* _ASM_MIPS_JUMP_LABEL_H */
diff --git a/arch/mips/include/asm/kdebug.h b/arch/mips/include/asm/kdebug.h
index 6a9af5fcb5d7..cba22ab7ad4d 100644
--- a/arch/mips/include/asm/kdebug.h
+++ b/arch/mips/include/asm/kdebug.h
@@ -10,7 +10,8 @@ enum die_val {
DIE_RI,
DIE_PAGE_FAULT,
DIE_BREAK,
- DIE_SSTEPBP
+ DIE_SSTEPBP,
+ DIE_MSAFP
};
#endif /* _ASM_MIPS_KDEBUG_H */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index ac4fc716062b..4c25823563fe 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -21,10 +21,10 @@
/* MIPS KVM register ids */
#define MIPS_CP0_32(_R, _S) \
- (KVM_REG_MIPS | KVM_REG_SIZE_U32 | 0x10000 | (8 * (_R) + (_S)))
+ (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U32 | (8 * (_R) + (_S)))
#define MIPS_CP0_64(_R, _S) \
- (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0x10000 | (8 * (_R) + (_S)))
+ (KVM_REG_MIPS_CP0 | KVM_REG_SIZE_U64 | (8 * (_R) + (_S)))
#define KVM_REG_MIPS_CP0_INDEX MIPS_CP0_32(0, 0)
#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
@@ -42,11 +42,14 @@
#define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0)
#define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0)
#define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0)
+#define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0)
#define KVM_REG_MIPS_CP0_EBASE MIPS_CP0_64(15, 1)
#define KVM_REG_MIPS_CP0_CONFIG MIPS_CP0_32(16, 0)
#define KVM_REG_MIPS_CP0_CONFIG1 MIPS_CP0_32(16, 1)
#define KVM_REG_MIPS_CP0_CONFIG2 MIPS_CP0_32(16, 2)
#define KVM_REG_MIPS_CP0_CONFIG3 MIPS_CP0_32(16, 3)
+#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4)
+#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5)
#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
@@ -119,6 +122,10 @@ struct kvm_vcpu_stat {
u32 syscall_exits;
u32 resvd_inst_exits;
u32 break_inst_exits;
+ u32 trap_inst_exits;
+ u32 msa_fpe_exits;
+ u32 fpe_exits;
+ u32 msa_disabled_exits;
u32 flush_dcache_exits;
u32 halt_successful_poll;
u32 halt_wakeup;
@@ -138,6 +145,10 @@ enum kvm_mips_exit_types {
SYSCALL_EXITS,
RESVD_INST_EXITS,
BREAK_INST_EXITS,
+ TRAP_INST_EXITS,
+ MSA_FPE_EXITS,
+ FPE_EXITS,
+ MSA_DISABLED_EXITS,
FLUSH_DCACHE_EXITS,
MAX_KVM_MIPS_EXIT_TYPES
};
@@ -206,6 +217,8 @@ struct mips_coproc {
#define MIPS_CP0_CONFIG1_SEL 1
#define MIPS_CP0_CONFIG2_SEL 2
#define MIPS_CP0_CONFIG3_SEL 3
+#define MIPS_CP0_CONFIG4_SEL 4
+#define MIPS_CP0_CONFIG5_SEL 5
/* Config0 register bits */
#define CP0C0_M 31
@@ -262,31 +275,6 @@ struct mips_coproc {
#define CP0C3_SM 1
#define CP0C3_TL 0
-/* Have config1, Cacheable, noncoherent, write-back, write allocate*/
-#define MIPS_CONFIG0 \
- ((1 << CP0C0_M) | (0x3 << CP0C0_K0))
-
-/* Have config2, no coprocessor2 attached, no MDMX support attached,
- no performance counters, watch registers present,
- no code compression, EJTAG present, no FPU, no watch registers */
-#define MIPS_CONFIG1 \
-((1 << CP0C1_M) | \
- (0 << CP0C1_C2) | (0 << CP0C1_MD) | (0 << CP0C1_PC) | \
- (0 << CP0C1_WR) | (0 << CP0C1_CA) | (1 << CP0C1_EP) | \
- (0 << CP0C1_FP))
-
-/* Have config3, no tertiary/secondary caches implemented */
-#define MIPS_CONFIG2 \
-((1 << CP0C2_M))
-
-/* No config4, no DSP ASE, no large physaddr (PABITS),
- no external interrupt controller, no vectored interrupts,
- no 1kb pages, no SmartMIPS ASE, no trace logic */
-#define MIPS_CONFIG3 \
-((0 << CP0C3_M) | (0 << CP0C3_DSPP) | (0 << CP0C3_LPA) | \
- (0 << CP0C3_VEIC) | (0 << CP0C3_VInt) | (0 << CP0C3_SP) | \
- (0 << CP0C3_SM) | (0 << CP0C3_TL))
-
/* MMU types, the first four entries have the same layout as the
CP0C0_MT field. */
enum mips_mmu_types {
@@ -321,7 +309,9 @@ enum mips_mmu_types {
*/
#define T_TRAP 13 /* Trap instruction */
#define T_VCEI 14 /* Virtual coherency exception */
+#define T_MSAFPE 14 /* MSA floating point exception */
#define T_FPE 15 /* Floating point exception */
+#define T_MSADIS 21 /* MSA disabled exception */
#define T_WATCH 23 /* Watch address reference */
#define T_VCED 31 /* Virtual coherency data */
@@ -374,6 +364,9 @@ struct kvm_mips_tlb {
long tlb_lo1;
};
+#define KVM_MIPS_FPU_FPU 0x1
+#define KVM_MIPS_FPU_MSA 0x2
+
#define KVM_MIPS_GUEST_TLB_SIZE 64
struct kvm_vcpu_arch {
void *host_ebase, *guest_ebase;
@@ -395,6 +388,8 @@ struct kvm_vcpu_arch {
/* FPU State */
struct mips_fpu_struct fpu;
+ /* Which FPU state is loaded (KVM_MIPS_FPU_*) */
+ unsigned int fpu_inuse;
/* COP0 State */
struct mips_coproc *cop0;
@@ -441,6 +436,9 @@ struct kvm_vcpu_arch {
/* WAIT executed */
int wait;
+
+ u8 fpu_enabled;
+ u8 msa_enabled;
};
@@ -482,11 +480,15 @@ struct kvm_vcpu_arch {
#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
+#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4])
+#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5])
#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
+#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
+#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
@@ -567,6 +569,31 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
}
+/* Helpers */
+
+static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
+{
+ return (!__builtin_constant_p(cpu_has_fpu) || cpu_has_fpu) &&
+ vcpu->fpu_enabled;
+}
+
+static inline bool kvm_mips_guest_has_fpu(struct kvm_vcpu_arch *vcpu)
+{
+ return kvm_mips_guest_can_have_fpu(vcpu) &&
+ kvm_read_c0_guest_config1(vcpu->cop0) & MIPS_CONF1_FP;
+}
+
+static inline bool kvm_mips_guest_can_have_msa(struct kvm_vcpu_arch *vcpu)
+{
+ return (!__builtin_constant_p(cpu_has_msa) || cpu_has_msa) &&
+ vcpu->msa_enabled;
+}
+
+static inline bool kvm_mips_guest_has_msa(struct kvm_vcpu_arch *vcpu)
+{
+ return kvm_mips_guest_can_have_msa(vcpu) &&
+ kvm_read_c0_guest_config3(vcpu->cop0) & MIPS_CONF3_MSA;
+}
struct kvm_mips_callbacks {
int (*handle_cop_unusable)(struct kvm_vcpu *vcpu);
@@ -578,6 +605,10 @@ struct kvm_mips_callbacks {
int (*handle_syscall)(struct kvm_vcpu *vcpu);
int (*handle_res_inst)(struct kvm_vcpu *vcpu);
int (*handle_break)(struct kvm_vcpu *vcpu);
+ int (*handle_trap)(struct kvm_vcpu *vcpu);
+ int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
+ int (*handle_fpe)(struct kvm_vcpu *vcpu);
+ int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
int (*vm_init)(struct kvm *kvm);
int (*vcpu_init)(struct kvm_vcpu *vcpu);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -596,6 +627,8 @@ struct kvm_mips_callbacks {
const struct kvm_one_reg *reg, s64 *v);
int (*set_one_reg)(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg, s64 v);
+ int (*vcpu_get_regs)(struct kvm_vcpu *vcpu);
+ int (*vcpu_set_regs)(struct kvm_vcpu *vcpu);
};
extern struct kvm_mips_callbacks *kvm_mips_callbacks;
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks);
@@ -606,6 +639,19 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu);
/* Trampoline ASM routine to start running in "Guest" context */
extern int __kvm_mips_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu);
+/* FPU/MSA context management */
+void __kvm_save_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fpu(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_fcsr(struct kvm_vcpu_arch *vcpu);
+void __kvm_save_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msa_upper(struct kvm_vcpu_arch *vcpu);
+void __kvm_restore_msacsr(struct kvm_vcpu_arch *vcpu);
+void kvm_own_fpu(struct kvm_vcpu *vcpu);
+void kvm_own_msa(struct kvm_vcpu *vcpu);
+void kvm_drop_fpu(struct kvm_vcpu *vcpu);
+void kvm_lose_fpu(struct kvm_vcpu *vcpu);
+
/* TLB handling */
uint32_t kvm_get_kernel_asid(struct kvm_vcpu *vcpu);
@@ -711,6 +757,26 @@ extern enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
+extern enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu);
+
+extern enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu);
+
extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run);
@@ -749,6 +815,11 @@ enum emulation_result kvm_mips_emulate_load(uint32_t inst,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+
/* Dynamic binary translation */
extern int kvm_mips_trans_cache_index(uint32_t inst, uint32_t *opc,
struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/processor.h b/arch/mips/include/asm/processor.h
index b5dcbee01fd7..9b3b48e21c22 100644
--- a/arch/mips/include/asm/processor.h
+++ b/arch/mips/include/asm/processor.h
@@ -105,7 +105,7 @@ union fpureg {
#ifdef CONFIG_CPU_LITTLE_ENDIAN
# define FPR_IDX(width, idx) (idx)
#else
-# define FPR_IDX(width, idx) ((FPU_REG_WIDTH / (width)) - 1 - (idx))
+# define FPR_IDX(width, idx) ((idx) ^ ((64 / (width)) - 1))
#endif
#define BUILD_FPR_ACCESS(width) \
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index 2c04b6d9ff85..6985eb59b085 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -36,77 +36,85 @@ struct kvm_regs {
/*
* for KVM_GET_FPU and KVM_SET_FPU
- *
- * If Status[FR] is zero (32-bit FPU), the upper 32-bits of the FPRs
- * are zero filled.
*/
struct kvm_fpu {
- __u64 fpr[32];
- __u32 fir;
- __u32 fccr;
- __u32 fexr;
- __u32 fenr;
- __u32 fcsr;
- __u32 pad;
};
/*
- * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access CP0
+ * For MIPS, we use KVM_SET_ONE_REG and KVM_GET_ONE_REG to access various
* registers. The id field is broken down as follows:
*
- * bits[2..0] - Register 'sel' index.
- * bits[7..3] - Register 'rd' index.
- * bits[15..8] - Must be zero.
- * bits[31..16] - 1 -> CP0 registers.
- * bits[51..32] - Must be zero.
* bits[63..52] - As per linux/kvm.h
+ * bits[51..32] - Must be zero.
+ * bits[31..16] - Register set.
+ *
+ * Register set = 0: GP registers from kvm_regs (see definitions below).
+ *
+ * Register set = 1: CP0 registers.
+ * bits[15..8] - Must be zero.
+ * bits[7..3] - Register 'rd' index.
+ * bits[2..0] - Register 'sel' index.
+ *
+ * Register set = 2: KVM specific registers (see definitions below).
+ *
+ * Register set = 3: FPU / MSA registers (see definitions below).
*
* Other sets registers may be added in the future. Each set would
* have its own identifier in bits[31..16].
- *
- * The registers defined in struct kvm_regs are also accessible, the
- * id values for these are below.
*/
-#define KVM_REG_MIPS_R0 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 0)
-#define KVM_REG_MIPS_R1 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 1)
-#define KVM_REG_MIPS_R2 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 2)
-#define KVM_REG_MIPS_R3 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 3)
-#define KVM_REG_MIPS_R4 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 4)
-#define KVM_REG_MIPS_R5 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 5)
-#define KVM_REG_MIPS_R6 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 6)
-#define KVM_REG_MIPS_R7 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 7)
-#define KVM_REG_MIPS_R8 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 8)
-#define KVM_REG_MIPS_R9 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 9)
-#define KVM_REG_MIPS_R10 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 10)
-#define KVM_REG_MIPS_R11 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 11)
-#define KVM_REG_MIPS_R12 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 12)
-#define KVM_REG_MIPS_R13 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 13)
-#define KVM_REG_MIPS_R14 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 14)
-#define KVM_REG_MIPS_R15 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 15)
-#define KVM_REG_MIPS_R16 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 16)
-#define KVM_REG_MIPS_R17 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 17)
-#define KVM_REG_MIPS_R18 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 18)
-#define KVM_REG_MIPS_R19 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 19)
-#define KVM_REG_MIPS_R20 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 20)
-#define KVM_REG_MIPS_R21 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 21)
-#define KVM_REG_MIPS_R22 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 22)
-#define KVM_REG_MIPS_R23 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 23)
-#define KVM_REG_MIPS_R24 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 24)
-#define KVM_REG_MIPS_R25 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 25)
-#define KVM_REG_MIPS_R26 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 26)
-#define KVM_REG_MIPS_R27 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 27)
-#define KVM_REG_MIPS_R28 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 28)
-#define KVM_REG_MIPS_R29 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 29)
-#define KVM_REG_MIPS_R30 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 30)
-#define KVM_REG_MIPS_R31 (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 31)
-
-#define KVM_REG_MIPS_HI (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 32)
-#define KVM_REG_MIPS_LO (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 33)
-#define KVM_REG_MIPS_PC (KVM_REG_MIPS | KVM_REG_SIZE_U64 | 34)
-
-/* KVM specific control registers */
+#define KVM_REG_MIPS_GP (KVM_REG_MIPS | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_CP0 (KVM_REG_MIPS | 0x0000000000010000ULL)
+#define KVM_REG_MIPS_KVM (KVM_REG_MIPS | 0x0000000000020000ULL)
+#define KVM_REG_MIPS_FPU (KVM_REG_MIPS | 0x0000000000030000ULL)
+
+
+/*
+ * KVM_REG_MIPS_GP - General purpose registers from kvm_regs.
+ */
+
+#define KVM_REG_MIPS_R0 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 0)
+#define KVM_REG_MIPS_R1 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 1)
+#define KVM_REG_MIPS_R2 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 2)
+#define KVM_REG_MIPS_R3 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 3)
+#define KVM_REG_MIPS_R4 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 4)
+#define KVM_REG_MIPS_R5 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 5)
+#define KVM_REG_MIPS_R6 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 6)
+#define KVM_REG_MIPS_R7 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 7)
+#define KVM_REG_MIPS_R8 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 8)
+#define KVM_REG_MIPS_R9 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 9)
+#define KVM_REG_MIPS_R10 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 10)
+#define KVM_REG_MIPS_R11 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 11)
+#define KVM_REG_MIPS_R12 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 12)
+#define KVM_REG_MIPS_R13 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 13)
+#define KVM_REG_MIPS_R14 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 14)
+#define KVM_REG_MIPS_R15 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 15)
+#define KVM_REG_MIPS_R16 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 16)
+#define KVM_REG_MIPS_R17 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 17)
+#define KVM_REG_MIPS_R18 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 18)
+#define KVM_REG_MIPS_R19 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 19)
+#define KVM_REG_MIPS_R20 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 20)
+#define KVM_REG_MIPS_R21 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 21)
+#define KVM_REG_MIPS_R22 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 22)
+#define KVM_REG_MIPS_R23 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 23)
+#define KVM_REG_MIPS_R24 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 24)
+#define KVM_REG_MIPS_R25 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 25)
+#define KVM_REG_MIPS_R26 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 26)
+#define KVM_REG_MIPS_R27 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 27)
+#define KVM_REG_MIPS_R28 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 28)
+#define KVM_REG_MIPS_R29 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 29)
+#define KVM_REG_MIPS_R30 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 30)
+#define KVM_REG_MIPS_R31 (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 31)
+
+#define KVM_REG_MIPS_HI (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 32)
+#define KVM_REG_MIPS_LO (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 33)
+#define KVM_REG_MIPS_PC (KVM_REG_MIPS_GP | KVM_REG_SIZE_U64 | 34)
+
+
+/*
+ * KVM_REG_MIPS_KVM - KVM specific control registers.
+ */
/*
* CP0_Count control
@@ -118,8 +126,7 @@ struct kvm_fpu {
* safely without losing time or guest timer interrupts.
* Other: Reserved, do not change.
*/
-#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
- 0x20000 | 0)
+#define KVM_REG_MIPS_COUNT_CTL (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 0)
#define KVM_REG_MIPS_COUNT_CTL_DC 0x00000001
/*
@@ -131,15 +138,46 @@ struct kvm_fpu {
* emulated.
* Modifications to times in the future are rejected.
*/
-#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
- 0x20000 | 1)
+#define KVM_REG_MIPS_COUNT_RESUME (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 1)
/*
* CP0_Count rate in Hz
* Specifies the rate of the CP0_Count timer in Hz. Modifications occur without
* discontinuities in CP0_Count.
*/
-#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS | KVM_REG_SIZE_U64 | \
- 0x20000 | 2)
+#define KVM_REG_MIPS_COUNT_HZ (KVM_REG_MIPS_KVM | KVM_REG_SIZE_U64 | 2)
+
+
+/*
+ * KVM_REG_MIPS_FPU - Floating Point and MIPS SIMD Architecture (MSA) registers.
+ *
+ * bits[15..8] - Register subset (see definitions below).
+ * bits[7..5] - Must be zero.
+ * bits[4..0] - Register number within register subset.
+ */
+
+#define KVM_REG_MIPS_FPR (KVM_REG_MIPS_FPU | 0x0000000000000000ULL)
+#define KVM_REG_MIPS_FCR (KVM_REG_MIPS_FPU | 0x0000000000000100ULL)
+#define KVM_REG_MIPS_MSACR (KVM_REG_MIPS_FPU | 0x0000000000000200ULL)
+
+/*
+ * KVM_REG_MIPS_FPR - Floating point / Vector registers.
+ */
+#define KVM_REG_MIPS_FPR_32(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U32 | (n))
+#define KVM_REG_MIPS_FPR_64(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U64 | (n))
+#define KVM_REG_MIPS_VEC_128(n) (KVM_REG_MIPS_FPR | KVM_REG_SIZE_U128 | (n))
+
+/*
+ * KVM_REG_MIPS_FCR - Floating point control registers.
+ */
+#define KVM_REG_MIPS_FCR_IR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 0)
+#define KVM_REG_MIPS_FCR_CSR (KVM_REG_MIPS_FCR | KVM_REG_SIZE_U32 | 31)
+
+/*
+ * KVM_REG_MIPS_MSACR - MIPS SIMD Architecture (MSA) control registers.
+ */
+#define KVM_REG_MIPS_MSA_IR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 0)
+#define KVM_REG_MIPS_MSA_CSR (KVM_REG_MIPS_MSACR | KVM_REG_SIZE_U32 | 1)
+
/*
* KVM MIPS specific structures and definitions
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 750d67ac41e9..e59fd7cfac9e 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -167,72 +167,6 @@ void output_thread_fpu_defines(void)
OFFSET(THREAD_FPR30, task_struct, thread.fpu.fpr[30]);
OFFSET(THREAD_FPR31, task_struct, thread.fpu.fpr[31]);
- /* the least significant 64 bits of each FP register */
- OFFSET(THREAD_FPR0_LS64, task_struct,
- thread.fpu.fpr[0].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR1_LS64, task_struct,
- thread.fpu.fpr[1].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR2_LS64, task_struct,
- thread.fpu.fpr[2].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR3_LS64, task_struct,
- thread.fpu.fpr[3].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR4_LS64, task_struct,
- thread.fpu.fpr[4].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR5_LS64, task_struct,
- thread.fpu.fpr[5].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR6_LS64, task_struct,
- thread.fpu.fpr[6].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR7_LS64, task_struct,
- thread.fpu.fpr[7].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR8_LS64, task_struct,
- thread.fpu.fpr[8].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR9_LS64, task_struct,
- thread.fpu.fpr[9].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR10_LS64, task_struct,
- thread.fpu.fpr[10].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR11_LS64, task_struct,
- thread.fpu.fpr[11].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR12_LS64, task_struct,
- thread.fpu.fpr[12].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR13_LS64, task_struct,
- thread.fpu.fpr[13].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR14_LS64, task_struct,
- thread.fpu.fpr[14].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR15_LS64, task_struct,
- thread.fpu.fpr[15].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR16_LS64, task_struct,
- thread.fpu.fpr[16].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR17_LS64, task_struct,
- thread.fpu.fpr[17].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR18_LS64, task_struct,
- thread.fpu.fpr[18].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR19_LS64, task_struct,
- thread.fpu.fpr[19].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR20_LS64, task_struct,
- thread.fpu.fpr[20].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR21_LS64, task_struct,
- thread.fpu.fpr[21].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR22_LS64, task_struct,
- thread.fpu.fpr[22].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR23_LS64, task_struct,
- thread.fpu.fpr[23].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR24_LS64, task_struct,
- thread.fpu.fpr[24].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR25_LS64, task_struct,
- thread.fpu.fpr[25].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR26_LS64, task_struct,
- thread.fpu.fpr[26].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR27_LS64, task_struct,
- thread.fpu.fpr[27].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR28_LS64, task_struct,
- thread.fpu.fpr[28].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR29_LS64, task_struct,
- thread.fpu.fpr[29].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR30_LS64, task_struct,
- thread.fpu.fpr[30].val64[FPR_IDX(64, 0)]);
- OFFSET(THREAD_FPR31_LS64, task_struct,
- thread.fpu.fpr[31].val64[FPR_IDX(64, 0)]);
-
OFFSET(THREAD_FCR31, task_struct, thread.fpu.fcr31);
OFFSET(THREAD_MSA_CSR, task_struct, thread.fpu.msacsr);
BLANK();
@@ -470,6 +404,45 @@ void output_kvm_defines(void)
OFFSET(VCPU_LO, kvm_vcpu_arch, lo);
OFFSET(VCPU_HI, kvm_vcpu_arch, hi);
OFFSET(VCPU_PC, kvm_vcpu_arch, pc);
+ BLANK();
+
+ OFFSET(VCPU_FPR0, kvm_vcpu_arch, fpu.fpr[0]);
+ OFFSET(VCPU_FPR1, kvm_vcpu_arch, fpu.fpr[1]);
+ OFFSET(VCPU_FPR2, kvm_vcpu_arch, fpu.fpr[2]);
+ OFFSET(VCPU_FPR3, kvm_vcpu_arch, fpu.fpr[3]);
+ OFFSET(VCPU_FPR4, kvm_vcpu_arch, fpu.fpr[4]);
+ OFFSET(VCPU_FPR5, kvm_vcpu_arch, fpu.fpr[5]);
+ OFFSET(VCPU_FPR6, kvm_vcpu_arch, fpu.fpr[6]);
+ OFFSET(VCPU_FPR7, kvm_vcpu_arch, fpu.fpr[7]);
+ OFFSET(VCPU_FPR8, kvm_vcpu_arch, fpu.fpr[8]);
+ OFFSET(VCPU_FPR9, kvm_vcpu_arch, fpu.fpr[9]);
+ OFFSET(VCPU_FPR10, kvm_vcpu_arch, fpu.fpr[10]);
+ OFFSET(VCPU_FPR11, kvm_vcpu_arch, fpu.fpr[11]);
+ OFFSET(VCPU_FPR12, kvm_vcpu_arch, fpu.fpr[12]);
+ OFFSET(VCPU_FPR13, kvm_vcpu_arch, fpu.fpr[13]);
+ OFFSET(VCPU_FPR14, kvm_vcpu_arch, fpu.fpr[14]);
+ OFFSET(VCPU_FPR15, kvm_vcpu_arch, fpu.fpr[15]);
+ OFFSET(VCPU_FPR16, kvm_vcpu_arch, fpu.fpr[16]);
+ OFFSET(VCPU_FPR17, kvm_vcpu_arch, fpu.fpr[17]);
+ OFFSET(VCPU_FPR18, kvm_vcpu_arch, fpu.fpr[18]);
+ OFFSET(VCPU_FPR19, kvm_vcpu_arch, fpu.fpr[19]);
+ OFFSET(VCPU_FPR20, kvm_vcpu_arch, fpu.fpr[20]);
+ OFFSET(VCPU_FPR21, kvm_vcpu_arch, fpu.fpr[21]);
+ OFFSET(VCPU_FPR22, kvm_vcpu_arch, fpu.fpr[22]);
+ OFFSET(VCPU_FPR23, kvm_vcpu_arch, fpu.fpr[23]);
+ OFFSET(VCPU_FPR24, kvm_vcpu_arch, fpu.fpr[24]);
+ OFFSET(VCPU_FPR25, kvm_vcpu_arch, fpu.fpr[25]);
+ OFFSET(VCPU_FPR26, kvm_vcpu_arch, fpu.fpr[26]);
+ OFFSET(VCPU_FPR27, kvm_vcpu_arch, fpu.fpr[27]);
+ OFFSET(VCPU_FPR28, kvm_vcpu_arch, fpu.fpr[28]);
+ OFFSET(VCPU_FPR29, kvm_vcpu_arch, fpu.fpr[29]);
+ OFFSET(VCPU_FPR30, kvm_vcpu_arch, fpu.fpr[30]);
+ OFFSET(VCPU_FPR31, kvm_vcpu_arch, fpu.fpr[31]);
+
+ OFFSET(VCPU_FCR31, kvm_vcpu_arch, fpu.fcr31);
+ OFFSET(VCPU_MSA_CSR, kvm_vcpu_arch, fpu.msacsr);
+ BLANK();
+
OFFSET(VCPU_COP0, kvm_vcpu_arch, cop0);
OFFSET(VCPU_GUEST_KERNEL_ASID, kvm_vcpu_arch, guest_kernel_asid);
OFFSET(VCPU_GUEST_USER_ASID, kvm_vcpu_arch, guest_user_asid);
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ebaabe3af15..af42e7003f12 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -360,12 +360,15 @@ NESTED(nmi_handler, PT_SIZE, sp)
.set mips1
SET_HARDFLOAT
cfc1 a1, fcr31
- li a2, ~(0x3f << 12)
- and a2, a1
- ctc1 a2, fcr31
.set pop
- TRACE_IRQS_ON
- STI
+ CLI
+ TRACE_IRQS_OFF
+ .endm
+
+ .macro __build_clear_msa_fpe
+ _cfcmsa a1, MSA_CSR
+ CLI
+ TRACE_IRQS_OFF
.endm
.macro __build_clear_ade
@@ -426,7 +429,7 @@ NESTED(nmi_handler, PT_SIZE, sp)
BUILD_HANDLER cpu cpu sti silent /* #11 */
BUILD_HANDLER ov ov sti silent /* #12 */
BUILD_HANDLER tr tr sti silent /* #13 */
- BUILD_HANDLER msa_fpe msa_fpe sti silent /* #14 */
+ BUILD_HANDLER msa_fpe msa_fpe msa_fpe silent /* #14 */
BUILD_HANDLER fpe fpe fpe silent /* #15 */
BUILD_HANDLER ftlb ftlb none silent /* #16 */
BUILD_HANDLER msa msa sti silent /* #21 */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 510452812594..7da6e324dd35 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -46,6 +46,26 @@
#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
+static void init_fp_ctx(struct task_struct *target)
+{
+ /* If FP has been used then the target already has context */
+ if (tsk_used_math(target))
+ return;
+
+ /* Begin with data registers set to all 1s... */
+ memset(&target->thread.fpu.fpr, ~0, sizeof(target->thread.fpu.fpr));
+
+ /* ...and FCSR zeroed */
+ target->thread.fpu.fcr31 = 0;
+
+ /*
+ * Record that the target has "used" math, such that the context
+ * just initialised, and any modifications made by the caller,
+ * aren't discarded.
+ */
+ set_stopped_child_used_math(target);
+}
+
/*
* Called by kernel/ptrace.c when detaching..
*
@@ -142,6 +162,7 @@ int ptrace_setfpregs(struct task_struct *child, __u32 __user *data)
if (!access_ok(VERIFY_READ, data, 33 * 8))
return -EIO;
+ init_fp_ctx(child);
fregs = get_fpu_regs(child);
for (i = 0; i < 32; i++) {
@@ -439,6 +460,8 @@ static int fpr_set(struct task_struct *target,
/* XXX fcr31 */
+ init_fp_ctx(target);
+
if (sizeof(target->thread.fpu.fpr[i]) == sizeof(elf_fpreg_t))
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu,
@@ -660,12 +683,7 @@ long arch_ptrace(struct task_struct *child, long request,
case FPR_BASE ... FPR_BASE + 31: {
union fpureg *fregs = get_fpu_regs(child);
- if (!tsk_used_math(child)) {
- /* FP not yet used */
- memset(&child->thread.fpu, ~0,
- sizeof(child->thread.fpu));
- child->thread.fpu.fcr31 = 0;
- }
+ init_fp_ctx(child);
#ifdef CONFIG_32BIT
if (test_thread_flag(TIF_32BIT_FPREGS)) {
/*
diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S
index 676c5030a953..1d88af26ba82 100644
--- a/arch/mips/kernel/r4k_fpu.S
+++ b/arch/mips/kernel/r4k_fpu.S
@@ -34,7 +34,6 @@
.endm
.set noreorder
- .set MIPS_ISA_ARCH_LEVEL_RAW
LEAF(_save_fp_context)
.set push
@@ -103,6 +102,7 @@ LEAF(_save_fp_context)
/* Save 32-bit process floating point context */
LEAF(_save_fp_context32)
.set push
+ .set MIPS_ISA_ARCH_LEVEL_RAW
SET_HARDFLOAT
cfc1 t1, fcr31
diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c
index 33984c04b60b..5b4d711f878d 100644
--- a/arch/mips/kernel/traps.c
+++ b/arch/mips/kernel/traps.c
@@ -701,6 +701,13 @@ asmlinkage void do_ov(struct pt_regs *regs)
int process_fpemu_return(int sig, void __user *fault_addr)
{
+ /*
+ * We can't allow the emulated instruction to leave any of the cause
+ * bits set in FCSR. If they were then the kernel would take an FP
+ * exception when restoring FP context.
+ */
+ current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+
if (sig == SIGSEGV || sig == SIGBUS) {
struct siginfo si = {0};
si.si_addr = fault_addr;
@@ -781,6 +788,11 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
if (notify_die(DIE_FP, "FP exception", regs, 0, regs_to_trapnr(regs),
SIGFPE) == NOTIFY_STOP)
goto out;
+
+ /* Clear FCSR.Cause before enabling interrupts */
+ write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X);
+ local_irq_enable();
+
die_if_kernel("FP exception in kernel code", regs);
if (fcr31 & FPU_CSR_UNI_X) {
@@ -804,18 +816,12 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31)
sig = fpu_emulator_cop1Handler(regs, &current->thread.fpu, 1,
&fault_addr);
- /*
- * We can't allow the emulated instruction to leave any of
- * the cause bit set in $fcr31.
- */
- current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X;
+ /* If something went wrong, signal */
+ process_fpemu_return(sig, fault_addr);
/* Restore the hardware register state */
own_fpu(1); /* Using the FPU again. */
- /* If something went wrong, signal */
- process_fpemu_return(sig, fault_addr);
-
goto out;
} else if (fcr31 & FPU_CSR_INV_X)
info.si_code = FPE_FLTINV;
@@ -1392,13 +1398,22 @@ out:
exception_exit(prev_state);
}
-asmlinkage void do_msa_fpe(struct pt_regs *regs)
+asmlinkage void do_msa_fpe(struct pt_regs *regs, unsigned int msacsr)
{
enum ctx_state prev_state;
prev_state = exception_enter();
+ if (notify_die(DIE_MSAFP, "MSA FP exception", regs, 0,
+ regs_to_trapnr(regs), SIGFPE) == NOTIFY_STOP)
+ goto out;
+
+ /* Clear MSACSR.Cause before enabling interrupts */
+ write_msa_csr(msacsr & ~MSA_CSR_CAUSEF);
+ local_irq_enable();
+
die_if_kernel("do_msa_fpe invoked from kernel context!", regs);
force_sig(SIGFPE, current);
+out:
exception_exit(prev_state);
}
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 401fe027c261..637ebbebd549 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -1,13 +1,15 @@
# Makefile for KVM support for MIPS
#
-common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
+common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o)
EXTRA_CFLAGS += -Ivirt/kvm -Iarch/mips/kvm
-kvm-objs := $(common-objs) mips.o emulate.o locore.o \
+common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
+
+kvm-objs := $(common-objs-y) mips.o emulate.o locore.o \
interrupt.o stats.o commpage.o \
- dyntrans.o trap_emul.o
+ dyntrans.o trap_emul.o fpu.o
obj-$(CONFIG_KVM) += kvm.o
obj-y += callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index fb3e8dfd1ff6..6230f376a44e 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -884,6 +884,84 @@ enum emulation_result kvm_mips_emul_tlbp(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
+/**
+ * kvm_mips_config1_wrmask() - Find mask of writable bits in guest Config1
+ * @vcpu: Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config1 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = 0;
+
+ /* Permit FPU to be present if FPU is supported */
+ if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+ mask |= MIPS_CONF1_FP;
+
+ return mask;
+}
+
+/**
+ * kvm_mips_config3_wrmask() - Find mask of writable bits in guest Config3
+ * @vcpu: Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config3 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu)
+{
+ /* Config4 is optional */
+ unsigned int mask = MIPS_CONF_M;
+
+ /* Permit MSA to be present if MSA is supported */
+ if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+ mask |= MIPS_CONF3_MSA;
+
+ return mask;
+}
+
+/**
+ * kvm_mips_config4_wrmask() - Find mask of writable bits in guest Config4
+ * @vcpu: Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config4 CP0
+ * register, by userland (currently read-only to the guest).
+ */
+unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu)
+{
+ /* Config5 is optional */
+ return MIPS_CONF_M;
+}
+
+/**
+ * kvm_mips_config5_wrmask() - Find mask of writable bits in guest Config5
+ * @vcpu: Virtual CPU.
+ *
+ * Finds the mask of bits which are writable in the guest's Config5 CP0
+ * register, by the guest itself.
+ */
+unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = 0;
+
+ /* Permit MSAEn changes if MSA supported and enabled */
+ if (kvm_mips_guest_has_msa(&vcpu->arch))
+ mask |= MIPS_CONF5_MSAEN;
+
+ /*
+ * Permit guest FPU mode changes if FPU is enabled and the relevant
+ * feature exists according to FIR register.
+ */
+ if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+ if (cpu_has_fre)
+ mask |= MIPS_CONF5_FRE;
+ /* We don't support UFR or UFE */
+ }
+
+ return mask;
+}
+
enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
uint32_t cause, struct kvm_run *run,
struct kvm_vcpu *vcpu)
@@ -1021,18 +1099,114 @@ enum emulation_result kvm_mips_emulate_CP0(uint32_t inst, uint32_t *opc,
kvm_mips_write_compare(vcpu,
vcpu->arch.gprs[rt]);
} else if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
- kvm_write_c0_guest_status(cop0,
- vcpu->arch.gprs[rt]);
+ unsigned int old_val, val, change;
+
+ old_val = kvm_read_c0_guest_status(cop0);
+ val = vcpu->arch.gprs[rt];
+ change = val ^ old_val;
+
+ /* Make sure that the NMI bit is never set */
+ val &= ~ST0_NMI;
+
+ /*
+ * Don't allow CU1 or FR to be set unless FPU
+ * capability enabled and exists in guest
+ * configuration.
+ */
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ val &= ~(ST0_CU1 | ST0_FR);
+
+ /*
+ * Also don't allow FR to be set if host doesn't
+ * support it.
+ */
+ if (!(current_cpu_data.fpu_id & MIPS_FPIR_F64))
+ val &= ~ST0_FR;
+
+
+ /* Handle changes in FPU mode */
+ preempt_disable();
+
+ /*
+ * FPU and Vector register state is made
+ * UNPREDICTABLE by a change of FR, so don't
+ * even bother saving it.
+ */
+ if (change & ST0_FR)
+ kvm_drop_fpu(vcpu);
+
+ /*
+ * If MSA state is already live, it is undefined
+ * how it interacts with FR=0 FPU state, and we
+ * don't want to hit reserved instruction
+ * exceptions trying to save the MSA state later
+ * when CU=1 && FR=1, so play it safe and save
+ * it first.
+ */
+ if (change & ST0_CU1 && !(val & ST0_FR) &&
+ vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+ kvm_lose_fpu(vcpu);
+
/*
- * Make sure that CU1 and NMI bits are
- * never set
+ * Propagate CU1 (FPU enable) changes
+ * immediately if the FPU context is already
+ * loaded. When disabling we leave the context
+ * loaded so it can be quickly enabled again in
+ * the near future.
*/
- kvm_clear_c0_guest_status(cop0,
- (ST0_CU1 | ST0_NMI));
+ if (change & ST0_CU1 &&
+ vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+ change_c0_status(ST0_CU1, val);
+
+ preempt_enable();
+
+ kvm_write_c0_guest_status(cop0, val);
#ifdef CONFIG_KVM_MIPS_DYN_TRANS
- kvm_mips_trans_mtc0(inst, opc, vcpu);
+ /*
+ * If FPU present, we need CU1/FR bits to take
+ * effect fairly soon.
+ */
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ kvm_mips_trans_mtc0(inst, opc, vcpu);
#endif
+ } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+ unsigned int old_val, val, change, wrmask;
+
+ old_val = kvm_read_c0_guest_config5(cop0);
+ val = vcpu->arch.gprs[rt];
+
+ /* Only a few bits are writable in Config5 */
+ wrmask = kvm_mips_config5_wrmask(vcpu);
+ change = (val ^ old_val) & wrmask;
+ val = old_val ^ change;
+
+
+ /* Handle changes in FPU/MSA modes */
+ preempt_disable();
+
+ /*
+ * Propagate FRE changes immediately if the FPU
+ * context is already loaded.
+ */
+ if (change & MIPS_CONF5_FRE &&
+ vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+ change_c0_config5(MIPS_CONF5_FRE, val);
+
+ /*
+ * Propagate MSAEn changes immediately if the
+ * MSA context is already loaded. When disabling
+ * we leave the context loaded so it can be
+ * quickly enabled again in the near future.
+ */
+ if (change & MIPS_CONF5_MSAEN &&
+ vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+ change_c0_config5(MIPS_CONF5_MSAEN,
+ val);
+
+ preempt_enable();
+
+ kvm_write_c0_guest_config5(cop0, val);
} else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
uint32_t old_cause, new_cause;
@@ -1970,6 +2144,146 @@ enum emulation_result kvm_mips_emulate_bp_exc(unsigned long cause,
return er;
}
+enum emulation_result kvm_mips_emulate_trap_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ enum emulation_result er = EMULATE_DONE;
+
+ if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+ /* save old pc */
+ kvm_write_c0_guest_epc(cop0, arch->pc);
+ kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+ if (cause & CAUSEF_BD)
+ kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+ else
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+ kvm_debug("Delivering TRAP @ pc %#lx\n", arch->pc);
+
+ kvm_change_c0_guest_cause(cop0, (0xff),
+ (T_TRAP << CAUSEB_EXCCODE));
+
+ /* Set PC to the exception entry point */
+ arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+ } else {
+ kvm_err("Trying to deliver TRAP when EXL is already set\n");
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
+enum emulation_result kvm_mips_emulate_msafpe_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ enum emulation_result er = EMULATE_DONE;
+
+ if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+ /* save old pc */
+ kvm_write_c0_guest_epc(cop0, arch->pc);
+ kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+ if (cause & CAUSEF_BD)
+ kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+ else
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+ kvm_debug("Delivering MSAFPE @ pc %#lx\n", arch->pc);
+
+ kvm_change_c0_guest_cause(cop0, (0xff),
+ (T_MSAFPE << CAUSEB_EXCCODE));
+
+ /* Set PC to the exception entry point */
+ arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+ } else {
+ kvm_err("Trying to deliver MSAFPE when EXL is already set\n");
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
+enum emulation_result kvm_mips_emulate_fpe_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ enum emulation_result er = EMULATE_DONE;
+
+ if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+ /* save old pc */
+ kvm_write_c0_guest_epc(cop0, arch->pc);
+ kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+ if (cause & CAUSEF_BD)
+ kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+ else
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+ kvm_debug("Delivering FPE @ pc %#lx\n", arch->pc);
+
+ kvm_change_c0_guest_cause(cop0, (0xff),
+ (T_FPE << CAUSEB_EXCCODE));
+
+ /* Set PC to the exception entry point */
+ arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+ } else {
+ kvm_err("Trying to deliver FPE when EXL is already set\n");
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
+enum emulation_result kvm_mips_emulate_msadis_exc(unsigned long cause,
+ uint32_t *opc,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ enum emulation_result er = EMULATE_DONE;
+
+ if ((kvm_read_c0_guest_status(cop0) & ST0_EXL) == 0) {
+ /* save old pc */
+ kvm_write_c0_guest_epc(cop0, arch->pc);
+ kvm_set_c0_guest_status(cop0, ST0_EXL);
+
+ if (cause & CAUSEF_BD)
+ kvm_set_c0_guest_cause(cop0, CAUSEF_BD);
+ else
+ kvm_clear_c0_guest_cause(cop0, CAUSEF_BD);
+
+ kvm_debug("Delivering MSADIS @ pc %#lx\n", arch->pc);
+
+ kvm_change_c0_guest_cause(cop0, (0xff),
+ (T_MSADIS << CAUSEB_EXCCODE));
+
+ /* Set PC to the exception entry point */
+ arch->pc = KVM_GUEST_KSEG0 + 0x180;
+
+ } else {
+ kvm_err("Trying to deliver MSADIS when EXL is already set\n");
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
/* ll/sc, rdhwr, sync emulation */
#define OPCODE 0xfc000000
@@ -2176,6 +2490,10 @@ enum emulation_result kvm_mips_check_privilege(unsigned long cause,
case T_SYSCALL:
case T_BREAK:
case T_RES_INST:
+ case T_TRAP:
+ case T_MSAFPE:
+ case T_FPE:
+ case T_MSADIS:
break;
case T_COP_UNUSABLE:
diff --git a/arch/mips/kvm/fpu.S b/arch/mips/kvm/fpu.S
new file mode 100644
index 000000000000..531fbf5131c0
--- /dev/null
+++ b/arch/mips/kvm/fpu.S
@@ -0,0 +1,122 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * FPU context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/fpregdef.h>
+#include <asm/mipsregs.h>
+#include <asm/regdef.h>
+
+ .set noreorder
+ .set noat
+
+LEAF(__kvm_save_fpu)
+ .set push
+ .set mips64r2
+ SET_HARDFLOAT
+ mfc0 t0, CP0_STATUS
+ sll t0, t0, 5 # is Status.FR set?
+ bgez t0, 1f # no: skip odd doubles
+ nop
+ sdc1 $f1, VCPU_FPR1(a0)
+ sdc1 $f3, VCPU_FPR3(a0)
+ sdc1 $f5, VCPU_FPR5(a0)
+ sdc1 $f7, VCPU_FPR7(a0)
+ sdc1 $f9, VCPU_FPR9(a0)
+ sdc1 $f11, VCPU_FPR11(a0)
+ sdc1 $f13, VCPU_FPR13(a0)
+ sdc1 $f15, VCPU_FPR15(a0)
+ sdc1 $f17, VCPU_FPR17(a0)
+ sdc1 $f19, VCPU_FPR19(a0)
+ sdc1 $f21, VCPU_FPR21(a0)
+ sdc1 $f23, VCPU_FPR23(a0)
+ sdc1 $f25, VCPU_FPR25(a0)
+ sdc1 $f27, VCPU_FPR27(a0)
+ sdc1 $f29, VCPU_FPR29(a0)
+ sdc1 $f31, VCPU_FPR31(a0)
+1: sdc1 $f0, VCPU_FPR0(a0)
+ sdc1 $f2, VCPU_FPR2(a0)
+ sdc1 $f4, VCPU_FPR4(a0)
+ sdc1 $f6, VCPU_FPR6(a0)
+ sdc1 $f8, VCPU_FPR8(a0)
+ sdc1 $f10, VCPU_FPR10(a0)
+ sdc1 $f12, VCPU_FPR12(a0)
+ sdc1 $f14, VCPU_FPR14(a0)
+ sdc1 $f16, VCPU_FPR16(a0)
+ sdc1 $f18, VCPU_FPR18(a0)
+ sdc1 $f20, VCPU_FPR20(a0)
+ sdc1 $f22, VCPU_FPR22(a0)
+ sdc1 $f24, VCPU_FPR24(a0)
+ sdc1 $f26, VCPU_FPR26(a0)
+ sdc1 $f28, VCPU_FPR28(a0)
+ jr ra
+ sdc1 $f30, VCPU_FPR30(a0)
+ .set pop
+ END(__kvm_save_fpu)
+
+LEAF(__kvm_restore_fpu)
+ .set push
+ .set mips64r2
+ SET_HARDFLOAT
+ mfc0 t0, CP0_STATUS
+ sll t0, t0, 5 # is Status.FR set?
+ bgez t0, 1f # no: skip odd doubles
+ nop
+ ldc1 $f1, VCPU_FPR1(a0)
+ ldc1 $f3, VCPU_FPR3(a0)
+ ldc1 $f5, VCPU_FPR5(a0)
+ ldc1 $f7, VCPU_FPR7(a0)
+ ldc1 $f9, VCPU_FPR9(a0)
+ ldc1 $f11, VCPU_FPR11(a0)
+ ldc1 $f13, VCPU_FPR13(a0)
+ ldc1 $f15, VCPU_FPR15(a0)
+ ldc1 $f17, VCPU_FPR17(a0)
+ ldc1 $f19, VCPU_FPR19(a0)
+ ldc1 $f21, VCPU_FPR21(a0)
+ ldc1 $f23, VCPU_FPR23(a0)
+ ldc1 $f25, VCPU_FPR25(a0)
+ ldc1 $f27, VCPU_FPR27(a0)
+ ldc1 $f29, VCPU_FPR29(a0)
+ ldc1 $f31, VCPU_FPR31(a0)
+1: ldc1 $f0, VCPU_FPR0(a0)
+ ldc1 $f2, VCPU_FPR2(a0)
+ ldc1 $f4, VCPU_FPR4(a0)
+ ldc1 $f6, VCPU_FPR6(a0)
+ ldc1 $f8, VCPU_FPR8(a0)
+ ldc1 $f10, VCPU_FPR10(a0)
+ ldc1 $f12, VCPU_FPR12(a0)
+ ldc1 $f14, VCPU_FPR14(a0)
+ ldc1 $f16, VCPU_FPR16(a0)
+ ldc1 $f18, VCPU_FPR18(a0)
+ ldc1 $f20, VCPU_FPR20(a0)
+ ldc1 $f22, VCPU_FPR22(a0)
+ ldc1 $f24, VCPU_FPR24(a0)
+ ldc1 $f26, VCPU_FPR26(a0)
+ ldc1 $f28, VCPU_FPR28(a0)
+ jr ra
+ ldc1 $f30, VCPU_FPR30(a0)
+ .set pop
+ END(__kvm_restore_fpu)
+
+LEAF(__kvm_restore_fcsr)
+ .set push
+ SET_HARDFLOAT
+ lw t0, VCPU_FCR31(a0)
+ /*
+ * The ctc1 must stay at this offset in __kvm_restore_fcsr.
+ * See kvm_mips_csr_die_notify() which handles t0 containing a value
+ * which triggers an FP Exception, which must be stepped over and
+ * ignored since the set cause bits must remain there for the guest.
+ */
+ ctc1 t0, fcr31
+ jr ra
+ nop
+ .set pop
+ END(__kvm_restore_fcsr)
diff --git a/arch/mips/kvm/locore.S b/arch/mips/kvm/locore.S
index 4a68b176d6e4..c567240386a0 100644
--- a/arch/mips/kvm/locore.S
+++ b/arch/mips/kvm/locore.S
@@ -36,6 +36,8 @@
#define PT_HOST_USERLOCAL PT_EPC
#define CP0_DDATA_LO $28,3
+#define CP0_CONFIG3 $16,3
+#define CP0_CONFIG5 $16,5
#define CP0_EBASE $15,1
#define CP0_INTCTL $12,1
@@ -353,6 +355,42 @@ NESTED (MIPSX(GuestException), CALLFRAME_SIZ, ra)
LONG_L k0, VCPU_HOST_EBASE(k1)
mtc0 k0,CP0_EBASE
+ /*
+ * If FPU is enabled, save FCR31 and clear it so that later ctc1's don't
+ * trigger FPE for pending exceptions.
+ */
+ .set at
+ and v1, v0, ST0_CU1
+ beqz v1, 1f
+ nop
+ .set push
+ SET_HARDFLOAT
+ cfc1 t0, fcr31
+ sw t0, VCPU_FCR31(k1)
+ ctc1 zero,fcr31
+ .set pop
+ .set noat
+1:
+
+#ifdef CONFIG_CPU_HAS_MSA
+ /*
+ * If MSA is enabled, save MSACSR and clear it so that later
+ * instructions don't trigger MSAFPE for pending exceptions.
+ */
+ mfc0 t0, CP0_CONFIG3
+ ext t0, t0, 28, 1 /* MIPS_CONF3_MSAP */
+ beqz t0, 1f
+ nop
+ mfc0 t0, CP0_CONFIG5
+ ext t0, t0, 27, 1 /* MIPS_CONF5_MSAEN */
+ beqz t0, 1f
+ nop
+ _cfcmsa t0, MSA_CSR
+ sw t0, VCPU_MSA_CSR(k1)
+ _ctcmsa MSA_CSR, zero
+1:
+#endif
+
/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
.set at
and v0, v0, ~(ST0_EXL | KSU_USER | ST0_IE)
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index c9eccf5df912..bb68e8d520e8 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -11,6 +11,7 @@
#include <linux/errno.h>
#include <linux/err.h>
+#include <linux/kdebug.h>
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/fs.h>
@@ -48,6 +49,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU },
{ "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU },
{ "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU },
+ { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU },
+ { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU },
+ { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU },
+ { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
{ "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU },
@@ -504,10 +509,13 @@ static u64 kvm_mips_get_one_regs[] = {
KVM_REG_MIPS_CP0_STATUS,
KVM_REG_MIPS_CP0_CAUSE,
KVM_REG_MIPS_CP0_EPC,
+ KVM_REG_MIPS_CP0_PRID,
KVM_REG_MIPS_CP0_CONFIG,
KVM_REG_MIPS_CP0_CONFIG1,
KVM_REG_MIPS_CP0_CONFIG2,
KVM_REG_MIPS_CP0_CONFIG3,
+ KVM_REG_MIPS_CP0_CONFIG4,
+ KVM_REG_MIPS_CP0_CONFIG5,
KVM_REG_MIPS_CP0_CONFIG7,
KVM_REG_MIPS_CP0_ERROREPC,
@@ -520,10 +528,14 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
int ret;
s64 v;
+ s64 vs[2];
+ unsigned int idx;
switch (reg->id) {
+ /* General purpose registers */
case KVM_REG_MIPS_R0 ... KVM_REG_MIPS_R31:
v = (long)vcpu->arch.gprs[reg->id - KVM_REG_MIPS_R0];
break;
@@ -537,6 +549,67 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
v = (long)vcpu->arch.pc;
break;
+ /* Floating point registers */
+ case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+ /* Odd singles in top of even double when FR=0 */
+ if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+ v = get_fpr32(&fpu->fpr[idx], 0);
+ else
+ v = get_fpr32(&fpu->fpr[idx & ~1], idx & 1);
+ break;
+ case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+ /* Can't access odd doubles in FR=0 mode */
+ if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+ return -EINVAL;
+ v = get_fpr64(&fpu->fpr[idx], 0);
+ break;
+ case KVM_REG_MIPS_FCR_IR:
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ v = boot_cpu_data.fpu_id;
+ break;
+ case KVM_REG_MIPS_FCR_CSR:
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ v = fpu->fcr31;
+ break;
+
+ /* MIPS SIMD Architecture (MSA) registers */
+ case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ /* Can't access MSA registers in FR=0 mode */
+ if (!(kvm_read_c0_guest_status(cop0) & ST0_FR))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ /* least significant byte first */
+ vs[0] = get_fpr64(&fpu->fpr[idx], 0);
+ vs[1] = get_fpr64(&fpu->fpr[idx], 1);
+#else
+ /* most significant byte first */
+ vs[0] = get_fpr64(&fpu->fpr[idx], 1);
+ vs[1] = get_fpr64(&fpu->fpr[idx], 0);
+#endif
+ break;
+ case KVM_REG_MIPS_MSA_IR:
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ v = boot_cpu_data.msa_id;
+ break;
+ case KVM_REG_MIPS_MSA_CSR:
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ v = fpu->msacsr;
+ break;
+
+ /* Co-processor 0 registers */
case KVM_REG_MIPS_CP0_INDEX:
v = (long)kvm_read_c0_guest_index(cop0);
break;
@@ -573,8 +646,8 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_EPC:
v = (long)kvm_read_c0_guest_epc(cop0);
break;
- case KVM_REG_MIPS_CP0_ERROREPC:
- v = (long)kvm_read_c0_guest_errorepc(cop0);
+ case KVM_REG_MIPS_CP0_PRID:
+ v = (long)kvm_read_c0_guest_prid(cop0);
break;
case KVM_REG_MIPS_CP0_CONFIG:
v = (long)kvm_read_c0_guest_config(cop0);
@@ -588,9 +661,18 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_CONFIG3:
v = (long)kvm_read_c0_guest_config3(cop0);
break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ v = (long)kvm_read_c0_guest_config4(cop0);
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ v = (long)kvm_read_c0_guest_config5(cop0);
+ break;
case KVM_REG_MIPS_CP0_CONFIG7:
v = (long)kvm_read_c0_guest_config7(cop0);
break;
+ case KVM_REG_MIPS_CP0_ERROREPC:
+ v = (long)kvm_read_c0_guest_errorepc(cop0);
+ break;
/* registers to be handled specially */
case KVM_REG_MIPS_CP0_COUNT:
case KVM_REG_MIPS_COUNT_CTL:
@@ -612,6 +694,10 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu,
u32 v32 = (u32)v;
return put_user(v32, uaddr32);
+ } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+ void __user *uaddr = (void __user *)(long)reg->addr;
+
+ return copy_to_user(uaddr, vs, 16);
} else {
return -EINVAL;
}
@@ -621,7 +707,10 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
const struct kvm_one_reg *reg)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
- u64 v;
+ struct mips_fpu_struct *fpu = &vcpu->arch.fpu;
+ s64 v;
+ s64 vs[2];
+ unsigned int idx;
if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U64) {
u64 __user *uaddr64 = (u64 __user *)(long)reg->addr;
@@ -635,11 +724,16 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
if (get_user(v32, uaddr32) != 0)
return -EFAULT;
v = (s64)v32;
+ } else if ((reg->id & KVM_REG_SIZE_MASK) == KVM_REG_SIZE_U128) {
+ void __user *uaddr = (void __user *)(long)reg->addr;
+
+ return copy_from_user(vs, uaddr, 16);
} else {
return -EINVAL;
}
switch (reg->id) {
+ /* General purpose registers */
case KVM_REG_MIPS_R0:
/* Silently ignore requests to set $0 */
break;
@@ -656,6 +750,64 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
vcpu->arch.pc = v;
break;
+ /* Floating point registers */
+ case KVM_REG_MIPS_FPR_32(0) ... KVM_REG_MIPS_FPR_32(31):
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_FPR_32(0);
+ /* Odd singles in top of even double when FR=0 */
+ if (kvm_read_c0_guest_status(cop0) & ST0_FR)
+ set_fpr32(&fpu->fpr[idx], 0, v);
+ else
+ set_fpr32(&fpu->fpr[idx & ~1], idx & 1, v);
+ break;
+ case KVM_REG_MIPS_FPR_64(0) ... KVM_REG_MIPS_FPR_64(31):
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_FPR_64(0);
+ /* Can't access odd doubles in FR=0 mode */
+ if (idx & 1 && !(kvm_read_c0_guest_status(cop0) & ST0_FR))
+ return -EINVAL;
+ set_fpr64(&fpu->fpr[idx], 0, v);
+ break;
+ case KVM_REG_MIPS_FCR_IR:
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ /* Read-only */
+ break;
+ case KVM_REG_MIPS_FCR_CSR:
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ return -EINVAL;
+ fpu->fcr31 = v;
+ break;
+
+ /* MIPS SIMD Architecture (MSA) registers */
+ case KVM_REG_MIPS_VEC_128(0) ... KVM_REG_MIPS_VEC_128(31):
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_VEC_128(0);
+#ifdef CONFIG_CPU_LITTLE_ENDIAN
+ /* least significant byte first */
+ set_fpr64(&fpu->fpr[idx], 0, vs[0]);
+ set_fpr64(&fpu->fpr[idx], 1, vs[1]);
+#else
+ /* most significant byte first */
+ set_fpr64(&fpu->fpr[idx], 1, vs[0]);
+ set_fpr64(&fpu->fpr[idx], 0, vs[1]);
+#endif
+ break;
+ case KVM_REG_MIPS_MSA_IR:
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ /* Read-only */
+ break;
+ case KVM_REG_MIPS_MSA_CSR:
+ if (!kvm_mips_guest_has_msa(&vcpu->arch))
+ return -EINVAL;
+ fpu->msacsr = v;
+ break;
+
+ /* Co-processor 0 registers */
case KVM_REG_MIPS_CP0_INDEX:
kvm_write_c0_guest_index(cop0, v);
break;
@@ -686,6 +838,9 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_EPC:
kvm_write_c0_guest_epc(cop0, v);
break;
+ case KVM_REG_MIPS_CP0_PRID:
+ kvm_write_c0_guest_prid(cop0, v);
+ break;
case KVM_REG_MIPS_CP0_ERROREPC:
kvm_write_c0_guest_errorepc(cop0, v);
break;
@@ -693,6 +848,12 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
case KVM_REG_MIPS_CP0_COUNT:
case KVM_REG_MIPS_CP0_COMPARE:
case KVM_REG_MIPS_CP0_CAUSE:
+ case KVM_REG_MIPS_CP0_CONFIG:
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ case KVM_REG_MIPS_CP0_CONFIG5:
case KVM_REG_MIPS_COUNT_CTL:
case KVM_REG_MIPS_COUNT_RESUME:
case KVM_REG_MIPS_COUNT_HZ:
@@ -703,6 +864,33 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
return 0;
}
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+ struct kvm_enable_cap *cap)
+{
+ int r = 0;
+
+ if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+ return -EINVAL;
+ if (cap->flags)
+ return -EINVAL;
+ if (cap->args[0])
+ return -EINVAL;
+
+ switch (cap->cap) {
+ case KVM_CAP_MIPS_FPU:
+ vcpu->arch.fpu_enabled = true;
+ break;
+ case KVM_CAP_MIPS_MSA:
+ vcpu->arch.msa_enabled = true;
+ break;
+ default:
+ r = -EINVAL;
+ break;
+ }
+
+ return r;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
unsigned long arg)
{
@@ -760,6 +948,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
break;
}
+ case KVM_ENABLE_CAP: {
+ struct kvm_enable_cap cap;
+
+ r = -EFAULT;
+ if (copy_from_user(&cap, argp, sizeof(cap)))
+ goto out;
+ r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+ break;
+ }
default:
r = -ENOIOCTLCMD;
}
@@ -868,11 +1065,30 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
switch (ext) {
case KVM_CAP_ONE_REG:
+ case KVM_CAP_ENABLE_CAP:
r = 1;
break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
+ case KVM_CAP_MIPS_FPU:
+ r = !!cpu_has_fpu;
+ break;
+ case KVM_CAP_MIPS_MSA:
+ /*
+ * We don't support MSA vector partitioning yet:
+ * 1) It would require explicit support which can't be tested
+ * yet due to lack of support in current hardware.
+ * 2) It extends the state that would need to be saved/restored
+ * by e.g. QEMU for migration.
+ *
+ * When vector partitioning hardware becomes available, support
+ * could be added by requiring a flag when enabling
+ * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+ * to save/restore the appropriate extra state.
+ */
+ r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+ break;
default:
r = 0;
break;
@@ -1119,6 +1335,30 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = kvm_mips_callbacks->handle_break(vcpu);
break;
+ case T_TRAP:
+ ++vcpu->stat.trap_inst_exits;
+ trace_kvm_exit(vcpu, TRAP_INST_EXITS);
+ ret = kvm_mips_callbacks->handle_trap(vcpu);
+ break;
+
+ case T_MSAFPE:
+ ++vcpu->stat.msa_fpe_exits;
+ trace_kvm_exit(vcpu, MSA_FPE_EXITS);
+ ret = kvm_mips_callbacks->handle_msa_fpe(vcpu);
+ break;
+
+ case T_FPE:
+ ++vcpu->stat.fpe_exits;
+ trace_kvm_exit(vcpu, FPE_EXITS);
+ ret = kvm_mips_callbacks->handle_fpe(vcpu);
+ break;
+
+ case T_MSADIS:
+ ++vcpu->stat.msa_disabled_exits;
+ trace_kvm_exit(vcpu, MSA_DISABLED_EXITS);
+ ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
+ break;
+
default:
kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
exccode, opc, kvm_get_inst(opc, vcpu), badvaddr,
@@ -1146,12 +1386,233 @@ skip_emul:
}
}
+ if (ret == RESUME_GUEST) {
+ /*
+ * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context
+ * is live), restore FCR31 / MSACSR.
+ *
+ * This should be before returning to the guest exception
+ * vector, as it may well cause an [MSA] FP exception if there
+ * are pending exception bits unmasked. (see
+ * kvm_mips_csr_die_notifier() for how that is handled).
+ */
+ if (kvm_mips_guest_has_fpu(&vcpu->arch) &&
+ read_c0_status() & ST0_CU1)
+ __kvm_restore_fcsr(&vcpu->arch);
+
+ if (kvm_mips_guest_has_msa(&vcpu->arch) &&
+ read_c0_config5() & MIPS_CONF5_MSAEN)
+ __kvm_restore_msacsr(&vcpu->arch);
+ }
+
/* Disable HTW before returning to guest or host */
htw_stop();
return ret;
}
+/* Enable FPU for guest and restore context */
+void kvm_own_fpu(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int sr, cfg5;
+
+ preempt_disable();
+
+ sr = kvm_read_c0_guest_status(cop0);
+
+ /*
+ * If MSA state is already live, it is undefined how it interacts with
+ * FR=0 FPU state, and we don't want to hit reserved instruction
+ * exceptions trying to save the MSA state later when CU=1 && FR=1, so
+ * play it safe and save it first.
+ *
+ * In theory we shouldn't ever hit this case since kvm_lose_fpu() should
+ * get called when guest CU1 is set, however we can't trust the guest
+ * not to clobber the status register directly via the commpage.
+ */
+ if (cpu_has_msa && sr & ST0_CU1 && !(sr & ST0_FR) &&
+ vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA)
+ kvm_lose_fpu(vcpu);
+
+ /*
+ * Enable FPU for guest
+ * We set FR and FRE according to guest context
+ */
+ change_c0_status(ST0_CU1 | ST0_FR, sr);
+ if (cpu_has_fre) {
+ cfg5 = kvm_read_c0_guest_config5(cop0);
+ change_c0_config5(MIPS_CONF5_FRE, cfg5);
+ }
+ enable_fpu_hazard();
+
+ /* If guest FPU state not active, restore it now */
+ if (!(vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)) {
+ __kvm_restore_fpu(&vcpu->arch);
+ vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+ }
+
+ preempt_enable();
+}
+
+#ifdef CONFIG_CPU_HAS_MSA
+/* Enable MSA for guest and restore context */
+void kvm_own_msa(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int sr, cfg5;
+
+ preempt_disable();
+
+ /*
+ * Enable FPU if enabled in guest, since we're restoring FPU context
+ * anyway. We set FR and FRE according to guest context.
+ */
+ if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+ sr = kvm_read_c0_guest_status(cop0);
+
+ /*
+ * If FR=0 FPU state is already live, it is undefined how it
+ * interacts with MSA state, so play it safe and save it first.
+ */
+ if (!(sr & ST0_FR) &&
+ (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU |
+ KVM_MIPS_FPU_MSA)) == KVM_MIPS_FPU_FPU)
+ kvm_lose_fpu(vcpu);
+
+ change_c0_status(ST0_CU1 | ST0_FR, sr);
+ if (sr & ST0_CU1 && cpu_has_fre) {
+ cfg5 = kvm_read_c0_guest_config5(cop0);
+ change_c0_config5(MIPS_CONF5_FRE, cfg5);
+ }
+ }
+
+ /* Enable MSA for guest */
+ set_c0_config5(MIPS_CONF5_MSAEN);
+ enable_fpu_hazard();
+
+ switch (vcpu->arch.fpu_inuse & (KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA)) {
+ case KVM_MIPS_FPU_FPU:
+ /*
+ * Guest FPU state already loaded, only restore upper MSA state
+ */
+ __kvm_restore_msa_upper(&vcpu->arch);
+ vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+ break;
+ case 0:
+ /* Neither FPU or MSA already active, restore full MSA state */
+ __kvm_restore_msa(&vcpu->arch);
+ vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_MSA;
+ if (kvm_mips_guest_has_fpu(&vcpu->arch))
+ vcpu->arch.fpu_inuse |= KVM_MIPS_FPU_FPU;
+ break;
+ default:
+ break;
+ }
+
+ preempt_enable();
+}
+#endif
+
+/* Drop FPU & MSA without saving it */
+void kvm_drop_fpu(struct kvm_vcpu *vcpu)
+{
+ preempt_disable();
+ if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+ disable_msa();
+ vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_MSA;
+ }
+ if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+ clear_c0_status(ST0_CU1 | ST0_FR);
+ vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+ }
+ preempt_enable();
+}
+
+/* Save and disable FPU & MSA */
+void kvm_lose_fpu(struct kvm_vcpu *vcpu)
+{
+ /*
+ * FPU & MSA get disabled in root context (hardware) when it is disabled
+ * in guest context (software), but the register state in the hardware
+ * may still be in use. This is why we explicitly re-enable the hardware
+ * before saving.
+ */
+
+ preempt_disable();
+ if (cpu_has_msa && vcpu->arch.fpu_inuse & KVM_MIPS_FPU_MSA) {
+ set_c0_config5(MIPS_CONF5_MSAEN);
+ enable_fpu_hazard();
+
+ __kvm_save_msa(&vcpu->arch);
+
+ /* Disable MSA & FPU */
+ disable_msa();
+ if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU)
+ clear_c0_status(ST0_CU1 | ST0_FR);
+ vcpu->arch.fpu_inuse &= ~(KVM_MIPS_FPU_FPU | KVM_MIPS_FPU_MSA);
+ } else if (vcpu->arch.fpu_inuse & KVM_MIPS_FPU_FPU) {
+ set_c0_status(ST0_CU1);
+ enable_fpu_hazard();
+
+ __kvm_save_fpu(&vcpu->arch);
+ vcpu->arch.fpu_inuse &= ~KVM_MIPS_FPU_FPU;
+
+ /* Disable FPU */
+ clear_c0_status(ST0_CU1 | ST0_FR);
+ }
+ preempt_enable();
+}
+
+/*
+ * Step over a specific ctc1 to FCSR and a specific ctcmsa to MSACSR which are
+ * used to restore guest FCSR/MSACSR state and may trigger a "harmless" FP/MSAFP
+ * exception if cause bits are set in the value being written.
+ */
+static int kvm_mips_csr_die_notify(struct notifier_block *self,
+ unsigned long cmd, void *ptr)
+{
+ struct die_args *args = (struct die_args *)ptr;
+ struct pt_regs *regs = args->regs;
+ unsigned long pc;
+
+ /* Only interested in FPE and MSAFPE */
+ if (cmd != DIE_FP && cmd != DIE_MSAFP)
+ return NOTIFY_DONE;
+
+ /* Return immediately if guest context isn't active */
+ if (!(current->flags & PF_VCPU))
+ return NOTIFY_DONE;
+
+ /* Should never get here from user mode */
+ BUG_ON(user_mode(regs));
+
+ pc = instruction_pointer(regs);
+ switch (cmd) {
+ case DIE_FP:
+ /* match 2nd instruction in __kvm_restore_fcsr */
+ if (pc != (unsigned long)&__kvm_restore_fcsr + 4)
+ return NOTIFY_DONE;
+ break;
+ case DIE_MSAFP:
+ /* match 2nd/3rd instruction in __kvm_restore_msacsr */
+ if (!cpu_has_msa ||
+ pc < (unsigned long)&__kvm_restore_msacsr + 4 ||
+ pc > (unsigned long)&__kvm_restore_msacsr + 8)
+ return NOTIFY_DONE;
+ break;
+ }
+
+ /* Move PC forward a little and continue executing */
+ instruction_pointer(regs) += 4;
+
+ return NOTIFY_STOP;
+}
+
+static struct notifier_block kvm_mips_csr_die_notifier = {
+ .notifier_call = kvm_mips_csr_die_notify,
+};
+
int __init kvm_mips_init(void)
{
int ret;
@@ -1161,6 +1622,8 @@ int __init kvm_mips_init(void)
if (ret)
return ret;
+ register_die_notifier(&kvm_mips_csr_die_notifier);
+
/*
* On MIPS, kernel modules are executed from "mapped space", which
* requires TLBs. The TLB handling code is statically linked with
@@ -1173,7 +1636,6 @@ int __init kvm_mips_init(void)
kvm_mips_release_pfn_clean = kvm_release_pfn_clean;
kvm_mips_is_error_pfn = is_error_pfn;
- pr_info("KVM/MIPS Initialized\n");
return 0;
}
@@ -1185,7 +1647,7 @@ void __exit kvm_mips_exit(void)
kvm_mips_release_pfn_clean = NULL;
kvm_mips_is_error_pfn = NULL;
- pr_info("KVM/MIPS unloaded\n");
+ unregister_die_notifier(&kvm_mips_csr_die_notifier);
}
module_init(kvm_mips_init);
diff --git a/arch/mips/kvm/msa.S b/arch/mips/kvm/msa.S
new file mode 100644
index 000000000000..d02f0c6cc2cc
--- /dev/null
+++ b/arch/mips/kvm/msa.S
@@ -0,0 +1,161 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * MIPS SIMD Architecture (MSA) context handling code for KVM.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/asmmacro.h>
+#include <asm/regdef.h>
+
+ .set noreorder
+ .set noat
+
+LEAF(__kvm_save_msa)
+ st_d 0, VCPU_FPR0, a0
+ st_d 1, VCPU_FPR1, a0
+ st_d 2, VCPU_FPR2, a0
+ st_d 3, VCPU_FPR3, a0
+ st_d 4, VCPU_FPR4, a0
+ st_d 5, VCPU_FPR5, a0
+ st_d 6, VCPU_FPR6, a0
+ st_d 7, VCPU_FPR7, a0
+ st_d 8, VCPU_FPR8, a0
+ st_d 9, VCPU_FPR9, a0
+ st_d 10, VCPU_FPR10, a0
+ st_d 11, VCPU_FPR11, a0
+ st_d 12, VCPU_FPR12, a0
+ st_d 13, VCPU_FPR13, a0
+ st_d 14, VCPU_FPR14, a0
+ st_d 15, VCPU_FPR15, a0
+ st_d 16, VCPU_FPR16, a0
+ st_d 17, VCPU_FPR17, a0
+ st_d 18, VCPU_FPR18, a0
+ st_d 19, VCPU_FPR19, a0
+ st_d 20, VCPU_FPR20, a0
+ st_d 21, VCPU_FPR21, a0
+ st_d 22, VCPU_FPR22, a0
+ st_d 23, VCPU_FPR23, a0
+ st_d 24, VCPU_FPR24, a0
+ st_d 25, VCPU_FPR25, a0
+ st_d 26, VCPU_FPR26, a0
+ st_d 27, VCPU_FPR27, a0
+ st_d 28, VCPU_FPR28, a0
+ st_d 29, VCPU_FPR29, a0
+ st_d 30, VCPU_FPR30, a0
+ st_d 31, VCPU_FPR31, a0
+ jr ra
+ nop
+ END(__kvm_save_msa)
+
+LEAF(__kvm_restore_msa)
+ ld_d 0, VCPU_FPR0, a0
+ ld_d 1, VCPU_FPR1, a0
+ ld_d 2, VCPU_FPR2, a0
+ ld_d 3, VCPU_FPR3, a0
+ ld_d 4, VCPU_FPR4, a0
+ ld_d 5, VCPU_FPR5, a0
+ ld_d 6, VCPU_FPR6, a0
+ ld_d 7, VCPU_FPR7, a0
+ ld_d 8, VCPU_FPR8, a0
+ ld_d 9, VCPU_FPR9, a0
+ ld_d 10, VCPU_FPR10, a0
+ ld_d 11, VCPU_FPR11, a0
+ ld_d 12, VCPU_FPR12, a0
+ ld_d 13, VCPU_FPR13, a0
+ ld_d 14, VCPU_FPR14, a0
+ ld_d 15, VCPU_FPR15, a0
+ ld_d 16, VCPU_FPR16, a0
+ ld_d 17, VCPU_FPR17, a0
+ ld_d 18, VCPU_FPR18, a0
+ ld_d 19, VCPU_FPR19, a0
+ ld_d 20, VCPU_FPR20, a0
+ ld_d 21, VCPU_FPR21, a0
+ ld_d 22, VCPU_FPR22, a0
+ ld_d 23, VCPU_FPR23, a0
+ ld_d 24, VCPU_FPR24, a0
+ ld_d 25, VCPU_FPR25, a0
+ ld_d 26, VCPU_FPR26, a0
+ ld_d 27, VCPU_FPR27, a0
+ ld_d 28, VCPU_FPR28, a0
+ ld_d 29, VCPU_FPR29, a0
+ ld_d 30, VCPU_FPR30, a0
+ ld_d 31, VCPU_FPR31, a0
+ jr ra
+ nop
+ END(__kvm_restore_msa)
+
+ .macro kvm_restore_msa_upper wr, off, base
+ .set push
+ .set noat
+#ifdef CONFIG_64BIT
+ ld $1, \off(\base)
+ insert_d \wr, 1
+#elif defined(CONFIG_CPU_LITTLE_ENDIAN)
+ lw $1, \off(\base)
+ insert_w \wr, 2
+ lw $1, (\off+4)(\base)
+ insert_w \wr, 3
+#else /* CONFIG_CPU_BIG_ENDIAN */
+ lw $1, (\off+4)(\base)
+ insert_w \wr, 2
+ lw $1, \off(\base)
+ insert_w \wr, 3
+#endif
+ .set pop
+ .endm
+
+LEAF(__kvm_restore_msa_upper)
+ kvm_restore_msa_upper 0, VCPU_FPR0 +8, a0
+ kvm_restore_msa_upper 1, VCPU_FPR1 +8, a0
+ kvm_restore_msa_upper 2, VCPU_FPR2 +8, a0
+ kvm_restore_msa_upper 3, VCPU_FPR3 +8, a0
+ kvm_restore_msa_upper 4, VCPU_FPR4 +8, a0
+ kvm_restore_msa_upper 5, VCPU_FPR5 +8, a0
+ kvm_restore_msa_upper 6, VCPU_FPR6 +8, a0
+ kvm_restore_msa_upper 7, VCPU_FPR7 +8, a0
+ kvm_restore_msa_upper 8, VCPU_FPR8 +8, a0
+ kvm_restore_msa_upper 9, VCPU_FPR9 +8, a0
+ kvm_restore_msa_upper 10, VCPU_FPR10+8, a0
+ kvm_restore_msa_upper 11, VCPU_FPR11+8, a0
+ kvm_restore_msa_upper 12, VCPU_FPR12+8, a0
+ kvm_restore_msa_upper 13, VCPU_FPR13+8, a0
+ kvm_restore_msa_upper 14, VCPU_FPR14+8, a0
+ kvm_restore_msa_upper 15, VCPU_FPR15+8, a0
+ kvm_restore_msa_upper 16, VCPU_FPR16+8, a0
+ kvm_restore_msa_upper 17, VCPU_FPR17+8, a0
+ kvm_restore_msa_upper 18, VCPU_FPR18+8, a0
+ kvm_restore_msa_upper 19, VCPU_FPR19+8, a0
+ kvm_restore_msa_upper 20, VCPU_FPR20+8, a0
+ kvm_restore_msa_upper 21, VCPU_FPR21+8, a0
+ kvm_restore_msa_upper 22, VCPU_FPR22+8, a0
+ kvm_restore_msa_upper 23, VCPU_FPR23+8, a0
+ kvm_restore_msa_upper 24, VCPU_FPR24+8, a0
+ kvm_restore_msa_upper 25, VCPU_FPR25+8, a0
+ kvm_restore_msa_upper 26, VCPU_FPR26+8, a0
+ kvm_restore_msa_upper 27, VCPU_FPR27+8, a0
+ kvm_restore_msa_upper 28, VCPU_FPR28+8, a0
+ kvm_restore_msa_upper 29, VCPU_FPR29+8, a0
+ kvm_restore_msa_upper 30, VCPU_FPR30+8, a0
+ kvm_restore_msa_upper 31, VCPU_FPR31+8, a0
+ jr ra
+ nop
+ END(__kvm_restore_msa_upper)
+
+LEAF(__kvm_restore_msacsr)
+ lw t0, VCPU_MSA_CSR(a0)
+ /*
+ * The ctcmsa must stay at this offset in __kvm_restore_msacsr.
+ * See kvm_mips_csr_die_notify() which handles t0 containing a value
+ * which triggers an MSA FP Exception, which must be stepped over and
+ * ignored since the set cause bits must remain there for the guest.
+ */
+ _ctcmsa MSA_CSR, t0
+ jr ra
+ nop
+ END(__kvm_restore_msacsr)
diff --git a/arch/mips/kvm/stats.c b/arch/mips/kvm/stats.c
index a74d6024c5ad..888bb67070ac 100644
--- a/arch/mips/kvm/stats.c
+++ b/arch/mips/kvm/stats.c
@@ -25,6 +25,10 @@ char *kvm_mips_exit_types_str[MAX_KVM_MIPS_EXIT_TYPES] = {
"System Call",
"Reserved Inst",
"Break Inst",
+ "Trap Inst",
+ "MSA FPE",
+ "FPE",
+ "MSA Disabled",
"D-Cache Flushes",
};
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index bbcd82242059..aed0ac2a4972 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -216,6 +216,7 @@ int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi,
if (idx > current_cpu_data.tlbsize) {
kvm_err("%s: Invalid Index: %d\n", __func__, idx);
kvm_mips_dump_host_tlbs();
+ local_irq_restore(flags);
return -1;
}
@@ -732,6 +733,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
}
+ /* restore guest state to registers */
+ kvm_mips_callbacks->vcpu_set_regs(vcpu);
+
local_irq_restore(flags);
}
@@ -750,6 +754,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
vcpu->arch.preempt_entryhi = read_c0_entryhi();
vcpu->arch.last_sched_cpu = cpu;
+ /* save guest state in registers */
+ kvm_mips_callbacks->vcpu_get_regs(vcpu);
+
if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) &
ASID_VERSION_MASK)) {
kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__,
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
index c1388d40663b..bd6437f67dc0 100644
--- a/arch/mips/kvm/trace.h
+++ b/arch/mips/kvm/trace.h
@@ -24,18 +24,18 @@ TRACE_EVENT(kvm_exit,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
TP_ARGS(vcpu, reason),
TP_STRUCT__entry(
- __field(struct kvm_vcpu *, vcpu)
+ __field(unsigned long, pc)
__field(unsigned int, reason)
),
TP_fast_assign(
- __entry->vcpu = vcpu;
+ __entry->pc = vcpu->arch.pc;
__entry->reason = reason;
),
TP_printk("[%s]PC: 0x%08lx",
kvm_mips_exit_types_str[__entry->reason],
- __entry->vcpu->arch.pc)
+ __entry->pc)
);
#endif /* _TRACE_KVM_H */
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index fd7257b70e65..d836ed5b0bc7 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -39,16 +39,30 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
struct kvm_run *run = vcpu->run;
uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
unsigned long cause = vcpu->arch.host_cp0_cause;
enum emulation_result er = EMULATE_DONE;
int ret = RESUME_GUEST;
- if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1)
- er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
- else
+ if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+ /* FPU Unusable */
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+ (kvm_read_c0_guest_status(cop0) & ST0_CU1) == 0) {
+ /*
+ * Unusable/no FPU in guest:
+ * deliver guest COP1 Unusable Exception
+ */
+ er = kvm_mips_emulate_fpu_exc(cause, opc, run, vcpu);
+ } else {
+ /* Restore FPU state */
+ kvm_own_fpu(vcpu);
+ er = EMULATE_DONE;
+ }
+ } else {
er = kvm_mips_emulate_inst(cause, opc, run, vcpu);
+ }
switch (er) {
case EMULATE_DONE:
@@ -330,6 +344,107 @@ static int kvm_trap_emul_handle_break(struct kvm_vcpu *vcpu)
return ret;
}
+static int kvm_trap_emul_handle_trap(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+ unsigned long cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ int ret = RESUME_GUEST;
+
+ er = kvm_mips_emulate_trap_exc(cause, opc, run, vcpu);
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static int kvm_trap_emul_handle_msa_fpe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+ unsigned long cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ int ret = RESUME_GUEST;
+
+ er = kvm_mips_emulate_msafpe_exc(cause, opc, run, vcpu);
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static int kvm_trap_emul_handle_fpe(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint32_t __user *opc = (uint32_t __user *)vcpu->arch.pc;
+ unsigned long cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ int ret = RESUME_GUEST;
+
+ er = kvm_mips_emulate_fpe_exc(cause, opc, run, vcpu);
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+/**
+ * kvm_trap_emul_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled.
+ */
+static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_run *run = vcpu->run;
+ uint32_t __user *opc = (uint32_t __user *) vcpu->arch.pc;
+ unsigned long cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ int ret = RESUME_GUEST;
+
+ if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+ (kvm_read_c0_guest_status(cop0) & (ST0_CU1 | ST0_FR)) == ST0_CU1) {
+ /*
+ * No MSA in guest, or FPU enabled and not in FR=1 mode,
+ * guest reserved instruction exception
+ */
+ er = kvm_mips_emulate_ri_exc(cause, opc, run, vcpu);
+ } else if (!(kvm_read_c0_guest_config5(cop0) & MIPS_CONF5_MSAEN)) {
+ /* MSA disabled by guest, guest MSA disabled exception */
+ er = kvm_mips_emulate_msadis_exc(cause, opc, run, vcpu);
+ } else {
+ /* Restore MSA/FPU state */
+ kvm_own_msa(vcpu);
+ er = EMULATE_DONE;
+ }
+
+ switch (er) {
+ case EMULATE_DONE:
+ ret = RESUME_GUEST;
+ break;
+
+ case EMULATE_FAIL:
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ break;
+
+ default:
+ BUG();
+ }
+ return ret;
+}
+
static int kvm_trap_emul_vm_init(struct kvm *kvm)
{
return 0;
@@ -351,8 +466,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
* guest will come up as expected, for now we simulate a MIPS 24kc
*/
kvm_write_c0_guest_prid(cop0, 0x00019300);
- kvm_write_c0_guest_config(cop0,
- MIPS_CONFIG0 | (0x1 << CP0C0_AR) |
+ /* Have config1, Cacheable, noncoherent, write-back, write allocate */
+ kvm_write_c0_guest_config(cop0, MIPS_CONF_M | (0x3 << CP0C0_K0) |
+ (0x1 << CP0C0_AR) |
(MMU_TYPE_R4000 << CP0C0_MT));
/* Read the cache characteristics from the host Config1 Register */
@@ -368,10 +484,18 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
(1 << CP0C1_WR) | (1 << CP0C1_CA));
kvm_write_c0_guest_config1(cop0, config1);
- kvm_write_c0_guest_config2(cop0, MIPS_CONFIG2);
- /* MIPS_CONFIG2 | (read_c0_config2() & 0xfff) */
- kvm_write_c0_guest_config3(cop0, MIPS_CONFIG3 | (0 << CP0C3_VInt) |
- (1 << CP0C3_ULRI));
+ /* Have config3, no tertiary/secondary caches implemented */
+ kvm_write_c0_guest_config2(cop0, MIPS_CONF_M);
+ /* MIPS_CONF_M | (read_c0_config2() & 0xfff) */
+
+ /* Have config4, UserLocal */
+ kvm_write_c0_guest_config3(cop0, MIPS_CONF_M | MIPS_CONF3_ULRI);
+
+ /* Have config5 */
+ kvm_write_c0_guest_config4(cop0, MIPS_CONF_M);
+
+ /* No config6 */
+ kvm_write_c0_guest_config5(cop0, 0);
/* Set Wait IE/IXMT Ignore in Config7, IAR, AR */
kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10));
@@ -416,6 +540,7 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
int ret = 0;
+ unsigned int cur, change;
switch (reg->id) {
case KVM_REG_MIPS_CP0_COUNT:
@@ -444,6 +569,44 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
kvm_write_c0_guest_cause(cop0, v);
}
break;
+ case KVM_REG_MIPS_CP0_CONFIG:
+ /* read-only for now */
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ cur = kvm_read_c0_guest_config1(cop0);
+ change = (cur ^ v) & kvm_mips_config1_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ kvm_write_c0_guest_config1(cop0, v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ /* read-only for now */
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ cur = kvm_read_c0_guest_config3(cop0);
+ change = (cur ^ v) & kvm_mips_config3_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ kvm_write_c0_guest_config3(cop0, v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ cur = kvm_read_c0_guest_config4(cop0);
+ change = (cur ^ v) & kvm_mips_config4_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ kvm_write_c0_guest_config4(cop0, v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ cur = kvm_read_c0_guest_config5(cop0);
+ change = (cur ^ v) & kvm_mips_config5_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ kvm_write_c0_guest_config5(cop0, v);
+ }
+ break;
case KVM_REG_MIPS_COUNT_CTL:
ret = kvm_mips_set_count_ctl(vcpu, v);
break;
@@ -459,6 +622,18 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
return ret;
}
+static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu)
+{
+ kvm_lose_fpu(vcpu);
+
+ return 0;
+}
+
+static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
/* exit handlers */
.handle_cop_unusable = kvm_trap_emul_handle_cop_unusable,
@@ -470,6 +645,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.handle_syscall = kvm_trap_emul_handle_syscall,
.handle_res_inst = kvm_trap_emul_handle_res_inst,
.handle_break = kvm_trap_emul_handle_break,
+ .handle_trap = kvm_trap_emul_handle_trap,
+ .handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
+ .handle_fpe = kvm_trap_emul_handle_fpe,
+ .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
.vm_init = kvm_trap_emul_vm_init,
.vcpu_init = kvm_trap_emul_vcpu_init,
@@ -483,6 +662,8 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.irq_clear = kvm_mips_irq_clear_cb,
.get_one_reg = kvm_trap_emul_get_one_reg,
.set_one_reg = kvm_trap_emul_set_one_reg,
+ .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs,
+ .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs,
};
int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index 3b7f65cc4218..cf9b4633257e 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -75,11 +75,11 @@ static int rtctmp;
int proc_dolasatrtc(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
- struct timespec ts;
+ struct timespec64 ts;
int r;
if (!write) {
- read_persistent_clock(&ts);
+ read_persistent_clock64(&ts);
rtctmp = ts.tv_sec;
/* check for time < 0 and set to 0 */
if (rtctmp < 0)
diff --git a/arch/nios2/include/asm/ptrace.h b/arch/nios2/include/asm/ptrace.h
index 20fb1cf2dab6..642462144872 100644
--- a/arch/nios2/include/asm/ptrace.h
+++ b/arch/nios2/include/asm/ptrace.h
@@ -15,7 +15,54 @@
#include <uapi/asm/ptrace.h>
+/* This struct defines the way the registers are stored on the
+ stack during a system call. */
+
#ifndef __ASSEMBLY__
+struct pt_regs {
+ unsigned long r8; /* r8-r15 Caller-saved GP registers */
+ unsigned long r9;
+ unsigned long r10;
+ unsigned long r11;
+ unsigned long r12;
+ unsigned long r13;
+ unsigned long r14;
+ unsigned long r15;
+ unsigned long r1; /* Assembler temporary */
+ unsigned long r2; /* Retval LS 32bits */
+ unsigned long r3; /* Retval MS 32bits */
+ unsigned long r4; /* r4-r7 Register arguments */
+ unsigned long r5;
+ unsigned long r6;
+ unsigned long r7;
+ unsigned long orig_r2; /* Copy of r2 ?? */
+ unsigned long ra; /* Return address */
+ unsigned long fp; /* Frame pointer */
+ unsigned long sp; /* Stack pointer */
+ unsigned long gp; /* Global pointer */
+ unsigned long estatus;
+ unsigned long ea; /* Exception return address (pc) */
+ unsigned long orig_r7;
+};
+
+/*
+ * This is the extended stack used by signal handlers and the context
+ * switcher: it's pushed after the normal "struct pt_regs".
+ */
+struct switch_stack {
+ unsigned long r16; /* r16-r23 Callee-saved GP registers */
+ unsigned long r17;
+ unsigned long r18;
+ unsigned long r19;
+ unsigned long r20;
+ unsigned long r21;
+ unsigned long r22;
+ unsigned long r23;
+ unsigned long fp;
+ unsigned long gp;
+ unsigned long ra;
+};
+
#define user_mode(regs) (((regs)->estatus & ESTATUS_EU))
#define instruction_pointer(regs) ((regs)->ra)
diff --git a/arch/nios2/include/asm/thread_info.h b/arch/nios2/include/asm/thread_info.h
index 1f266575beb5..a16e55cbd8ad 100644
--- a/arch/nios2/include/asm/thread_info.h
+++ b/arch/nios2/include/asm/thread_info.h
@@ -47,7 +47,6 @@ struct thread_info {
0-0x7FFFFFFF for user-thead
0-0xFFFFFFFF for kernel-thread
*/
- struct restart_block restart_block;
struct pt_regs *regs;
};
@@ -64,9 +63,6 @@ struct thread_info {
.cpu = 0, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
- .restart_block = { \
- .fn = do_no_restart_syscall, \
- }, \
}
#define init_thread_info (init_thread_union.thread_info)
diff --git a/arch/nios2/include/asm/ucontext.h b/arch/nios2/include/asm/ucontext.h
deleted file mode 100644
index 2c87614b0f6e..000000000000
--- a/arch/nios2/include/asm/ucontext.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright (C) 2010 Tobias Klauser <tklauser@distanz.ch>
- * Copyright (C) 2004 Microtronix Datacom Ltd
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License. See the file "COPYING" in the main directory of this archive
- * for more details.
- */
-
-#ifndef _ASM_NIOS2_UCONTEXT_H
-#define _ASM_NIOS2_UCONTEXT_H
-
-typedef int greg_t;
-#define NGREG 32
-typedef greg_t gregset_t[NGREG];
-
-struct mcontext {
- int version;
- gregset_t gregs;
-};
-
-#define MCONTEXT_VERSION 2
-
-struct ucontext {
- unsigned long uc_flags;
- struct ucontext *uc_link;
- stack_t uc_stack;
- struct mcontext uc_mcontext;
- sigset_t uc_sigmask; /* mask last for extensibility */
-};
-
-#endif
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 4f07ca3f8d10..e0bb972a50d7 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,4 +1,5 @@
include include/uapi/asm-generic/Kbuild.asm
header-y += elf.h
-header-y += ucontext.h
+
+generic-y += ucontext.h
diff --git a/arch/nios2/include/uapi/asm/elf.h b/arch/nios2/include/uapi/asm/elf.h
index a5b91ae5cf56..6f06d3b2949e 100644
--- a/arch/nios2/include/uapi/asm/elf.h
+++ b/arch/nios2/include/uapi/asm/elf.h
@@ -50,9 +50,7 @@
typedef unsigned long elf_greg_t;
-#define ELF_NGREG \
- ((sizeof(struct pt_regs) + sizeof(struct switch_stack)) / \
- sizeof(elf_greg_t))
+#define ELF_NGREG 49
typedef elf_greg_t elf_gregset_t[ELF_NGREG];
typedef unsigned long elf_fpregset_t;
diff --git a/arch/nios2/include/uapi/asm/ptrace.h b/arch/nios2/include/uapi/asm/ptrace.h
index e83a7c9d1c36..eff00e67c0a2 100644
--- a/arch/nios2/include/uapi/asm/ptrace.h
+++ b/arch/nios2/include/uapi/asm/ptrace.h
@@ -60,60 +60,21 @@
#define PTR_IPENDING 37
#define PTR_CPUID 38
#define PTR_CTL6 39
-#define PTR_CTL7 40
+#define PTR_EXCEPTION 40
#define PTR_PTEADDR 41
#define PTR_TLBACC 42
#define PTR_TLBMISC 43
+#define PTR_ECCINJ 44
+#define PTR_BADADDR 45
+#define PTR_CONFIG 46
+#define PTR_MPUBASE 47
+#define PTR_MPUACC 48
-#define NUM_PTRACE_REG (PTR_TLBMISC + 1)
+#define NUM_PTRACE_REG (PTR_MPUACC + 1)
-/* this struct defines the way the registers are stored on the
- stack during a system call.
-
- There is a fake_regs in setup.c that has to match pt_regs.*/
-
-struct pt_regs {
- unsigned long r8; /* r8-r15 Caller-saved GP registers */
- unsigned long r9;
- unsigned long r10;
- unsigned long r11;
- unsigned long r12;
- unsigned long r13;
- unsigned long r14;
- unsigned long r15;
- unsigned long r1; /* Assembler temporary */
- unsigned long r2; /* Retval LS 32bits */
- unsigned long r3; /* Retval MS 32bits */
- unsigned long r4; /* r4-r7 Register arguments */
- unsigned long r5;
- unsigned long r6;
- unsigned long r7;
- unsigned long orig_r2; /* Copy of r2 ?? */
- unsigned long ra; /* Return address */
- unsigned long fp; /* Frame pointer */
- unsigned long sp; /* Stack pointer */
- unsigned long gp; /* Global pointer */
- unsigned long estatus;
- unsigned long ea; /* Exception return address (pc) */
- unsigned long orig_r7;
-};
-
-/*
- * This is the extended stack used by signal handlers and the context
- * switcher: it's pushed after the normal "struct pt_regs".
- */
-struct switch_stack {
- unsigned long r16; /* r16-r23 Callee-saved GP registers */
- unsigned long r17;
- unsigned long r18;
- unsigned long r19;
- unsigned long r20;
- unsigned long r21;
- unsigned long r22;
- unsigned long r23;
- unsigned long fp;
- unsigned long gp;
- unsigned long ra;
+/* User structures for general purpose registers. */
+struct user_pt_regs {
+ __u32 regs[49];
};
#endif /* __ASSEMBLY__ */
diff --git a/arch/nios2/include/uapi/asm/sigcontext.h b/arch/nios2/include/uapi/asm/sigcontext.h
index 7b8bb41867d4..b67944a50927 100644
--- a/arch/nios2/include/uapi/asm/sigcontext.h
+++ b/arch/nios2/include/uapi/asm/sigcontext.h
@@ -15,14 +15,16 @@
* details.
*/
-#ifndef _ASM_NIOS2_SIGCONTEXT_H
-#define _ASM_NIOS2_SIGCONTEXT_H
+#ifndef _UAPI__ASM_SIGCONTEXT_H
+#define _UAPI__ASM_SIGCONTEXT_H
-#include <asm/ptrace.h>
+#include <linux/types.h>
+
+#define MCONTEXT_VERSION 2
struct sigcontext {
- struct pt_regs regs;
- unsigned long sc_mask; /* old sigmask */
+ int version;
+ unsigned long gregs[32];
};
#endif
diff --git a/arch/nios2/kernel/entry.S b/arch/nios2/kernel/entry.S
index 7729bd3f2e79..27b006c52e12 100644
--- a/arch/nios2/kernel/entry.S
+++ b/arch/nios2/kernel/entry.S
@@ -161,7 +161,7 @@ ENTRY(inthandler)
***********************************************************************
*/
ENTRY(handle_trap)
- ldw r24, -4(ea) /* instruction that caused the exception */
+ ldwio r24, -4(ea) /* instruction that caused the exception */
srli r24, r24, 4
andi r24, r24, 0x7c
movia r9,trap_table
diff --git a/arch/nios2/kernel/signal.c b/arch/nios2/kernel/signal.c
index 2d0ea25be171..20662b0f6c9e 100644
--- a/arch/nios2/kernel/signal.c
+++ b/arch/nios2/kernel/signal.c
@@ -39,11 +39,11 @@ static inline int rt_restore_ucontext(struct pt_regs *regs,
struct ucontext *uc, int *pr2)
{
int temp;
- greg_t *gregs = uc->uc_mcontext.gregs;
+ unsigned long *gregs = uc->uc_mcontext.gregs;
int err;
/* Always make any pending restarted system calls return -EINTR */
- current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ current->restart_block.fn = do_no_restart_syscall;
err = __get_user(temp, &uc->uc_mcontext.version);
if (temp != MCONTEXT_VERSION)
@@ -127,7 +127,7 @@ badframe:
static inline int rt_setup_ucontext(struct ucontext *uc, struct pt_regs *regs)
{
struct switch_stack *sw = (struct switch_stack *)regs - 1;
- greg_t *gregs = uc->uc_mcontext.gregs;
+ unsigned long *gregs = uc->uc_mcontext.gregs;
int err = 0;
err |= __put_user(MCONTEXT_VERSION, &uc->uc_mcontext.version);
diff --git a/arch/nios2/mm/cacheflush.c b/arch/nios2/mm/cacheflush.c
index 2ae482b42669..796642932e2e 100644
--- a/arch/nios2/mm/cacheflush.c
+++ b/arch/nios2/mm/cacheflush.c
@@ -23,9 +23,6 @@ static void __flush_dcache(unsigned long start, unsigned long end)
end += (cpuinfo.dcache_line_size - 1);
end &= ~(cpuinfo.dcache_line_size - 1);
- if (end > start + cpuinfo.dcache_size)
- end = start + cpuinfo.dcache_size;
-
for (addr = start; addr < end; addr += cpuinfo.dcache_line_size) {
__asm__ __volatile__ (" flushda 0(%0)\n"
: /* Outputs */
diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c
index 0d231adfe576..0c9b6afe69e9 100644
--- a/arch/nios2/mm/fault.c
+++ b/arch/nios2/mm/fault.c
@@ -126,7 +126,6 @@ good_area:
break;
}
-survive:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -220,11 +219,6 @@ no_context:
*/
out_of_memory:
up_read(&mm->mmap_sem);
- if (is_global_init(tsk)) {
- yield();
- down_read(&mm->mmap_sem);
- goto survive;
- }
if (!user_mode(regs))
goto no_context;
pagefault_out_of_memory();
diff --git a/arch/parisc/include/asm/pgalloc.h b/arch/parisc/include/asm/pgalloc.h
index f213f5b4c423..d17437238a2c 100644
--- a/arch/parisc/include/asm/pgalloc.h
+++ b/arch/parisc/include/asm/pgalloc.h
@@ -26,7 +26,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
if (likely(pgd != NULL)) {
memset(pgd, 0, PAGE_SIZE<<PGD_ALLOC_ORDER);
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
actual_pgd += PTRS_PER_PGD;
/* Populate first pmd with allocated memory. We mark it
* with PxD_FLAG_ATTACHED as a signal to the system that this
@@ -45,7 +45,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
{
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
pgd -= PTRS_PER_PGD;
#endif
free_pages((unsigned long)pgd, PGD_ALLOC_ORDER);
@@ -72,12 +72,15 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
{
-#ifdef CONFIG_64BIT
if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
- /* This is the permanent pmd attached to the pgd;
- * cannot free it */
+ /*
+ * This is the permanent pmd attached to the pgd;
+ * cannot free it.
+ * Increment the counter to compensate for the decrement
+ * done by generic mm code.
+ */
+ mm_inc_nr_pmds(mm);
return;
-#endif
free_pages((unsigned long)pmd, PMD_ORDER);
}
@@ -99,7 +102,7 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
static inline void
pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte)
{
-#ifdef CONFIG_64BIT
+#if PT_NLEVELS == 3
/* preserve the gateway marker if this is the beginning of
* the permanent pmd */
if(pmd_flag(*pmd) & PxD_FLAG_ATTACHED)
diff --git a/arch/parisc/kernel/syscall_table.S b/arch/parisc/kernel/syscall_table.S
index 5a8997d63899..8eefb12d1d33 100644
--- a/arch/parisc/kernel/syscall_table.S
+++ b/arch/parisc/kernel/syscall_table.S
@@ -55,8 +55,8 @@
#define ENTRY_COMP(_name_) .word sys_##_name_
#endif
- ENTRY_SAME(restart_syscall) /* 0 */
- ENTRY_SAME(exit)
+90: ENTRY_SAME(restart_syscall) /* 0 */
+91: ENTRY_SAME(exit)
ENTRY_SAME(fork_wrapper)
ENTRY_SAME(read)
ENTRY_SAME(write)
@@ -439,7 +439,10 @@
ENTRY_SAME(bpf)
ENTRY_COMP(execveat)
- /* Nothing yet */
+
+.ifne (. - 90b) - (__NR_Linux_syscalls * (91b - 90b))
+.error "size of syscall table does not fit value of __NR_Linux_syscalls"
+.endif
#undef ENTRY_SAME
#undef ENTRY_DIFF
diff --git a/arch/powerpc/include/asm/cputhreads.h b/arch/powerpc/include/asm/cputhreads.h
index 2bf8e9307be9..4c8ad592ae33 100644
--- a/arch/powerpc/include/asm/cputhreads.h
+++ b/arch/powerpc/include/asm/cputhreads.h
@@ -55,7 +55,7 @@ static inline cpumask_t cpu_thread_mask_to_cores(const struct cpumask *threads)
static inline int cpu_nr_cores(void)
{
- return NR_CPUS >> threads_shift;
+ return nr_cpu_ids >> threads_shift;
}
static inline cpumask_t cpu_online_cores_map(void)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 9cfa3706a1b8..f1ea5972f6ec 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -113,6 +113,7 @@ extern void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number, unsigned long pe_num);
extern int iommu_add_device(struct device *dev);
extern void iommu_del_device(struct device *dev);
+extern int __init tce_iommu_bus_notifier_init(void);
#else
static inline void iommu_register_group(struct iommu_table *tbl,
int pci_domain_number,
@@ -128,6 +129,11 @@ static inline int iommu_add_device(struct device *dev)
static inline void iommu_del_device(struct device *dev)
{
}
+
+static inline int __init tce_iommu_bus_notifier_init(void)
+{
+ return 0;
+}
#endif /* !CONFIG_IOMMU_API */
static inline void set_iommu_table_base_and_group(struct device *dev,
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
new file mode 100644
index 000000000000..744fd54de374
--- /dev/null
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -0,0 +1,9 @@
+#ifndef _ASM_POWERPC_IRQ_WORK_H
+#define _ASM_POWERPC_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+ return true;
+}
+
+#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 03cd858a401c..4cbe23af400a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -153,6 +153,7 @@
#define PPC_INST_MFSPR_PVR_MASK 0xfc1fffff
#define PPC_INST_MFTMR 0x7c0002dc
#define PPC_INST_MSGSND 0x7c00019c
+#define PPC_INST_MSGCLR 0x7c0001dc
#define PPC_INST_MSGSNDP 0x7c00011c
#define PPC_INST_MTTMR 0x7c0003dc
#define PPC_INST_NOP 0x60000000
@@ -309,6 +310,8 @@
___PPC_RB(b) | __PPC_EH(eh))
#define PPC_MSGSND(b) stringify_in_c(.long PPC_INST_MSGSND | \
___PPC_RB(b))
+#define PPC_MSGCLR(b) stringify_in_c(.long PPC_INST_MSGCLR | \
+ ___PPC_RB(b))
#define PPC_MSGSNDP(b) stringify_in_c(.long PPC_INST_MSGSNDP | \
___PPC_RB(b))
#define PPC_POPCNTB(a, s) stringify_in_c(.long PPC_INST_POPCNTB | \
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 1c874fb533bb..af56b5c6c81a 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -608,13 +608,16 @@
#define SRR1_ISI_N_OR_G 0x10000000 /* ISI: Access is no-exec or G */
#define SRR1_ISI_PROT 0x08000000 /* ISI: Other protection fault */
#define SRR1_WAKEMASK 0x00380000 /* reason for wakeup */
+#define SRR1_WAKEMASK_P8 0x003c0000 /* reason for wakeup on POWER8 */
#define SRR1_WAKESYSERR 0x00300000 /* System error */
#define SRR1_WAKEEE 0x00200000 /* External interrupt */
#define SRR1_WAKEMT 0x00280000 /* mtctrl */
#define SRR1_WAKEHMI 0x00280000 /* Hypervisor maintenance */
#define SRR1_WAKEDEC 0x00180000 /* Decrementer interrupt */
+#define SRR1_WAKEDBELL 0x00140000 /* Privileged doorbell on P8 */
#define SRR1_WAKETHERM 0x00100000 /* Thermal management interrupt */
#define SRR1_WAKERESET 0x00100000 /* System reset */
+#define SRR1_WAKEHDBELL 0x000c0000 /* Hypervisor doorbell on P8 */
#define SRR1_WAKESTATE 0x00030000 /* Powersave exit mask [46:47] */
#define SRR1_WS_DEEPEST 0x00030000 /* Some resources not maintained,
* may not be recoverable */
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index f337666768a7..f83046878336 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -437,6 +437,26 @@ static struct cpu_spec __initdata cpu_specs[] = {
.machine_check_early = __machine_check_early_realmode_p8,
.platform = "power8",
},
+ { /* Power8NVL */
+ .pvr_mask = 0xffff0000,
+ .pvr_value = 0x004c0000,
+ .cpu_name = "POWER8NVL (raw)",
+ .cpu_features = CPU_FTRS_POWER8,
+ .cpu_user_features = COMMON_USER_POWER8,
+ .cpu_user_features2 = COMMON_USER2_POWER8,
+ .mmu_features = MMU_FTRS_POWER8,
+ .icache_bsize = 128,
+ .dcache_bsize = 128,
+ .num_pmcs = 6,
+ .pmc_type = PPC_PMC_IBM,
+ .oprofile_cpu_type = "ppc64/power8",
+ .oprofile_type = PPC_OPROFILE_INVALID,
+ .cpu_setup = __setup_cpu_power8,
+ .cpu_restore = __restore_cpu_power8,
+ .flush_tlb = __flush_tlb_power8,
+ .machine_check_early = __machine_check_early_realmode_p8,
+ .platform = "power8",
+ },
{ /* Power8 DD1: Does not support doorbell IPIs */
.pvr_mask = 0xffffff00,
.pvr_value = 0x004d0100,
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f4217819cc31..2128f3a96c32 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -17,6 +17,7 @@
#include <asm/dbell.h>
#include <asm/irq_regs.h>
+#include <asm/kvm_ppc.h>
#ifdef CONFIG_SMP
void doorbell_setup_this_cpu(void)
@@ -41,6 +42,7 @@ void doorbell_exception(struct pt_regs *regs)
may_hard_irq_enable();
+ kvmppc_set_host_ipi(smp_processor_id(), 0);
__this_cpu_inc(irq_stat.doorbell_irqs);
smp_ipi_demux();
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index c2df8150bd7a..9519e6bdc6d7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ machine_check_handle_early:
bne 9f /* continue in V mode if we are. */
5:
-#ifdef CONFIG_KVM_BOOK3S_64_HV
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
* We are coming from kernel context. Check if we are coming from
* guest. if yes, then we can continue. We will fall through
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 5d3968c4d799..b054f33ab1fb 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1175,4 +1175,30 @@ void iommu_del_device(struct device *dev)
}
EXPORT_SYMBOL_GPL(iommu_del_device);
+static int tce_iommu_bus_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ struct device *dev = data;
+
+ switch (action) {
+ case BUS_NOTIFY_ADD_DEVICE:
+ return iommu_add_device(dev);
+ case BUS_NOTIFY_DEL_DEVICE:
+ if (dev->iommu_group)
+ iommu_del_device(dev);
+ return 0;
+ default:
+ return 0;
+ }
+}
+
+static struct notifier_block tce_iommu_bus_nb = {
+ .notifier_call = tce_iommu_bus_notifier,
+};
+
+int __init tce_iommu_bus_notifier_init(void)
+{
+ bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
+ return 0;
+}
#endif /* CONFIG_IOMMU_API */
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 6e19afa35a15..ec9ec2058d2d 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -541,8 +541,8 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
if (smp_ops->give_timebase)
smp_ops->give_timebase();
- /* Wait until cpu puts itself in the online map */
- while (!cpu_online(cpu))
+ /* Wait until cpu puts itself in the online & active maps */
+ while (!cpu_online(cpu) || !cpu_active(cpu))
cpu_relax();
return 0;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index de4018a1bc4b..de747563d29d 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -636,7 +636,7 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
spin_lock(&vcpu->arch.vpa_update_lock);
lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
if (lppaca)
- yield_count = lppaca->yield_count;
+ yield_count = be32_to_cpu(lppaca->yield_count);
spin_unlock(&vcpu->arch.vpa_update_lock);
return yield_count;
}
@@ -942,20 +942,20 @@ static int kvm_arch_vcpu_ioctl_set_sregs_hv(struct kvm_vcpu *vcpu,
static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
bool preserve_top32)
{
+ struct kvm *kvm = vcpu->kvm;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 mask;
+ mutex_lock(&kvm->lock);
spin_lock(&vc->lock);
/*
* If ILE (interrupt little-endian) has changed, update the
* MSR_LE bit in the intr_msr for each vcpu in this vcore.
*/
if ((new_lpcr & LPCR_ILE) != (vc->lpcr & LPCR_ILE)) {
- struct kvm *kvm = vcpu->kvm;
struct kvm_vcpu *vcpu;
int i;
- mutex_lock(&kvm->lock);
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->arch.vcore != vc)
continue;
@@ -964,7 +964,6 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
else
vcpu->arch.intr_msr &= ~MSR_LE;
}
- mutex_unlock(&kvm->lock);
}
/*
@@ -981,6 +980,7 @@ static void kvmppc_set_lpcr(struct kvm_vcpu *vcpu, u64 new_lpcr,
mask &= 0xFFFFFFFF;
vc->lpcr = (vc->lpcr & ~mask) | (new_lpcr & mask);
spin_unlock(&vc->lock);
+ mutex_unlock(&kvm->lock);
}
static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index bb94e6f20c81..6cbf1630cb70 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1005,6 +1005,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
/* Save HEIR (HV emulation assist reg) in emul_inst
if this is an HEI (HV emulation interrupt, e40) */
li r3,KVM_INST_FETCH_FAILED
+ stw r3,VCPU_LAST_INST(r9)
cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
bne 11f
mfspr r3,SPRN_HEIR
diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f816f2..6249cdc834d1 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -34,7 +34,7 @@
#include <asm/kvm_para.h>
#include <asm/kvm_host.h>
#include <asm/kvm_ppc.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
#define MAX_CPU 32
#define MAX_SRC 256
@@ -289,11 +289,6 @@ static inline void IRQ_resetbit(struct irq_queue *q, int n_IRQ)
clear_bit(n_IRQ, q->queue);
}
-static inline int IRQ_testbit(struct irq_queue *q, int n_IRQ)
-{
- return test_bit(n_IRQ, q->queue);
-}
-
static void IRQ_check(struct openpic *opp, struct irq_queue *q)
{
int irq = -1;
@@ -1374,8 +1369,9 @@ static int kvm_mpic_write_internal(struct openpic *opp, gpa_t addr, u32 val)
return -ENXIO;
}
-static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
- int len, void *ptr)
+static int kvm_mpic_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, void *ptr)
{
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
@@ -1415,8 +1411,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr,
return ret;
}
-static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr,
- int len, const void *ptr)
+static int kvm_mpic_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, const void *ptr)
{
struct openpic *opp = container_of(this, struct openpic, mmio);
int ret;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 27c0face86f4..24bfe401373e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -807,7 +807,7 @@ int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
@@ -880,7 +880,7 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, run->mmio.phys_addr,
+ ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, run->mmio.phys_addr,
bytes, &run->mmio.data);
srcu_read_unlock(&vcpu->kvm->srcu, idx);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0509bca5e830..fcbe899fe299 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -9,11 +9,11 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/jump_label.h>
#include <asm/ppc_asm.h>
#include <asm/hvcall.h>
#include <asm/asm-offsets.h>
#include <asm/opal.h>
-#include <asm/jump_label.h>
.section ".text"
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index e69142f4af08..54323d6b5166 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -836,30 +836,4 @@ void __init pnv_pci_init(void)
#endif
}
-static int tce_iommu_bus_notifier(struct notifier_block *nb,
- unsigned long action, void *data)
-{
- struct device *dev = data;
-
- switch (action) {
- case BUS_NOTIFY_ADD_DEVICE:
- return iommu_add_device(dev);
- case BUS_NOTIFY_DEL_DEVICE:
- if (dev->iommu_group)
- iommu_del_device(dev);
- return 0;
- default:
- return 0;
- }
-}
-
-static struct notifier_block tce_iommu_bus_nb = {
- .notifier_call = tce_iommu_bus_notifier,
-};
-
-static int __init tce_iommu_bus_notifier_init(void)
-{
- bus_register_notifier(&pci_bus_type, &tce_iommu_bus_nb);
- return 0;
-}
machine_subsys_initcall_sync(powernv, tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index fc34025ef822..38a45088f633 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -33,6 +33,8 @@
#include <asm/runlatch.h>
#include <asm/code-patching.h>
#include <asm/dbell.h>
+#include <asm/kvm_ppc.h>
+#include <asm/ppc-opcode.h>
#include "powernv.h"
@@ -149,7 +151,7 @@ static int pnv_smp_cpu_disable(void)
static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
- unsigned long srr1;
+ unsigned long srr1, wmask;
u32 idle_states;
/* Standard hot unplug procedure */
@@ -161,6 +163,10 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
+ wmask = SRR1_WAKEMASK;
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ wmask = SRR1_WAKEMASK_P8;
+
idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
@@ -191,10 +197,14 @@ static void pnv_smp_cpu_kill_self(void)
* having finished executing in a KVM guest, then srr1
* contains 0.
*/
- if ((srr1 & SRR1_WAKEMASK) == SRR1_WAKEEE) {
+ if ((srr1 & wmask) == SRR1_WAKEEE) {
icp_native_flush_interrupt();
local_paca->irq_happened &= PACA_IRQ_HARD_DIS;
smp_mb();
+ } else if ((srr1 & wmask) == SRR1_WAKEHDBELL) {
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+ asm volatile(PPC_MSGCLR(%0) : : "r" (msg));
+ kvmppc_set_host_ipi(cpu, 0);
}
if (cpu_core_split_required())
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index ccd53f91e8aa..74b5b8e239c8 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -7,12 +7,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/jump_label.h>
#include <asm/hvcall.h>
#include <asm/processor.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
#include <asm/ptrace.h>
-#include <asm/jump_label.h>
.section ".text"
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 1d3d52dc3ff3..7803a19adb31 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1340,3 +1340,5 @@ static int __init disable_multitce(char *str)
}
__setup("multitce=", disable_multitce);
+
+machine_subsys_initcall_sync(pseries, tce_iommu_bus_notifier_init);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index b5682fd6c984..b7a67e3d2201 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -26,7 +26,7 @@
#include <linux/dma-mapping.h>
#include <linux/console.h>
#include <linux/export.h>
-#include <linux/static_key.h>
+#include <linux/jump_label.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/page.h>
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 90cf3dcbd9f2..8f35d525cede 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -25,10 +25,10 @@
static struct kobject *mobility_kobj;
struct update_props_workarea {
- u32 phandle;
- u32 state;
- u64 reserved;
- u32 nprops;
+ __be32 phandle;
+ __be32 state;
+ __be64 reserved;
+ __be32 nprops;
} __packed;
#define NODE_ACTION_MASK 0xff000000
@@ -54,11 +54,11 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
return rc;
}
-static int delete_dt_node(u32 phandle)
+static int delete_dt_node(__be32 phandle)
{
struct device_node *dn;
- dn = of_find_node_by_phandle(phandle);
+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn)
return -ENOENT;
@@ -127,7 +127,7 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
return 0;
}
-static int update_dt_node(u32 phandle, s32 scope)
+static int update_dt_node(__be32 phandle, s32 scope)
{
struct update_props_workarea *upwa;
struct device_node *dn;
@@ -136,6 +136,7 @@ static int update_dt_node(u32 phandle, s32 scope)
char *prop_data;
char *rtas_buf;
int update_properties_token;
+ u32 nprops;
u32 vd;
update_properties_token = rtas_token("ibm,update-properties");
@@ -146,7 +147,7 @@ static int update_dt_node(u32 phandle, s32 scope)
if (!rtas_buf)
return -ENOMEM;
- dn = of_find_node_by_phandle(phandle);
+ dn = of_find_node_by_phandle(be32_to_cpu(phandle));
if (!dn) {
kfree(rtas_buf);
return -ENOENT;
@@ -162,6 +163,7 @@ static int update_dt_node(u32 phandle, s32 scope)
break;
prop_data = rtas_buf + sizeof(*upwa);
+ nprops = be32_to_cpu(upwa->nprops);
/* On the first call to ibm,update-properties for a node the
* the first property value descriptor contains an empty
@@ -170,17 +172,17 @@ static int update_dt_node(u32 phandle, s32 scope)
*/
if (*prop_data == 0) {
prop_data++;
- vd = *(u32 *)prop_data;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += vd + sizeof(vd);
- upwa->nprops--;
+ nprops--;
}
- for (i = 0; i < upwa->nprops; i++) {
+ for (i = 0; i < nprops; i++) {
char *prop_name;
prop_name = prop_data;
prop_data += strlen(prop_name) + 1;
- vd = *(u32 *)prop_data;
+ vd = be32_to_cpu(*(__be32 *)prop_data);
prop_data += sizeof(vd);
switch (vd) {
@@ -212,13 +214,13 @@ static int update_dt_node(u32 phandle, s32 scope)
return 0;
}
-static int add_dt_node(u32 parent_phandle, u32 drc_index)
+static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
{
struct device_node *dn;
struct device_node *parent_dn;
int rc;
- parent_dn = of_find_node_by_phandle(parent_phandle);
+ parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
if (!parent_dn)
return -ENOENT;
@@ -237,7 +239,7 @@ static int add_dt_node(u32 parent_phandle, u32 drc_index)
int pseries_devicetree_update(s32 scope)
{
char *rtas_buf;
- u32 *data;
+ __be32 *data;
int update_nodes_token;
int rc;
@@ -254,17 +256,17 @@ int pseries_devicetree_update(s32 scope)
if (rc && rc != 1)
break;
- data = (u32 *)rtas_buf + 4;
- while (*data & NODE_ACTION_MASK) {
+ data = (__be32 *)rtas_buf + 4;
+ while (be32_to_cpu(*data) & NODE_ACTION_MASK) {
int i;
- u32 action = *data & NODE_ACTION_MASK;
- int node_count = *data & NODE_COUNT_MASK;
+ u32 action = be32_to_cpu(*data) & NODE_ACTION_MASK;
+ u32 node_count = be32_to_cpu(*data) & NODE_COUNT_MASK;
data++;
for (i = 0; i < node_count; i++) {
- u32 phandle = *data++;
- u32 drc_index;
+ __be32 phandle = *data++;
+ __be32 drc_index;
switch (action) {
case DELETE_DT_NODE:
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index c9df40b5c0ac..c9c875d9ed31 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -211,7 +211,7 @@ do { \
extern unsigned long mmap_rnd_mask;
-#define STACK_RND_MASK (mmap_rnd_mask)
+#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask)
#define ARCH_DLINFO \
do { \
diff --git a/arch/s390/include/asm/jump_label.h b/arch/s390/include/asm/jump_label.h
index 58642fd29c87..2b77e235b5fb 100644
--- a/arch/s390/include/asm/jump_label.h
+++ b/arch/s390/include/asm/jump_label.h
@@ -1,6 +1,8 @@
#ifndef _ASM_S390_JUMP_LABEL_H
#define _ASM_S390_JUMP_LABEL_H
+#ifndef __ASSEMBLY__
+
#include <linux/types.h>
#define JUMP_LABEL_NOP_SIZE 6
@@ -39,4 +41,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index d84559e31f32..d01fc588b5c3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -172,7 +172,9 @@ struct kvm_s390_sie_block {
__u32 fac; /* 0x01a0 */
__u8 reserved1a4[20]; /* 0x01a4 */
__u64 cbrlo; /* 0x01b8 */
- __u8 reserved1c0[30]; /* 0x01c0 */
+ __u8 reserved1c0[8]; /* 0x01c0 */
+ __u32 ecd; /* 0x01c8 */
+ __u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
__u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
@@ -183,11 +185,17 @@ struct kvm_s390_itdb {
__u8 data[256];
} __packed;
+struct kvm_s390_vregs {
+ __vector128 vrs[32];
+ __u8 reserved200[512]; /* for future vector expansion */
+} __packed;
+
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
- __u8 reserved700[2304]; /* 0x0700 */
+ __u8 reserved700[1280]; /* 0x0700 */
+ struct kvm_s390_vregs vregs; /* 0x0c00 */
} __packed;
struct kvm_vcpu_stat {
@@ -238,6 +246,7 @@ struct kvm_vcpu_stat {
u32 instruction_sigp_stop;
u32 instruction_sigp_stop_store_status;
u32 instruction_sigp_store_status;
+ u32 instruction_sigp_store_adtl_status;
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
@@ -270,6 +279,7 @@ struct kvm_vcpu_stat {
#define PGM_SPECIAL_OPERATION 0x13
#define PGM_OPERAND 0x15
#define PGM_TRACE_TABEL 0x16
+#define PGM_VECTOR_PROCESSING 0x1b
#define PGM_SPACE_SWITCH 0x1c
#define PGM_HFP_SQUARE_ROOT 0x1d
#define PGM_PC_TRANSLATION_SPEC 0x1f
@@ -334,6 +344,11 @@ enum irq_types {
IRQ_PEND_COUNT
};
+/* We have 2M for virtio device descriptor pages. Smallest amount of
+ * memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
+ */
+#define KVM_S390_MAX_VIRTIO_IRQS 87381
+
/*
* Repressible (non-floating) machine check interrupts
* subclass bits in MCIC
@@ -411,13 +426,32 @@ struct kvm_s390_local_interrupt {
unsigned long pending_irqs;
};
+#define FIRQ_LIST_IO_ISC_0 0
+#define FIRQ_LIST_IO_ISC_1 1
+#define FIRQ_LIST_IO_ISC_2 2
+#define FIRQ_LIST_IO_ISC_3 3
+#define FIRQ_LIST_IO_ISC_4 4
+#define FIRQ_LIST_IO_ISC_5 5
+#define FIRQ_LIST_IO_ISC_6 6
+#define FIRQ_LIST_IO_ISC_7 7
+#define FIRQ_LIST_PFAULT 8
+#define FIRQ_LIST_VIRTIO 9
+#define FIRQ_LIST_COUNT 10
+#define FIRQ_CNTR_IO 0
+#define FIRQ_CNTR_SERVICE 1
+#define FIRQ_CNTR_VIRTIO 2
+#define FIRQ_CNTR_PFAULT 3
+#define FIRQ_MAX_COUNT 4
+
struct kvm_s390_float_interrupt {
+ unsigned long pending_irqs;
spinlock_t lock;
- struct list_head list;
- atomic_t active;
+ struct list_head lists[FIRQ_LIST_COUNT];
+ int counters[FIRQ_MAX_COUNT];
+ struct kvm_s390_mchk_info mchk;
+ struct kvm_s390_ext_info srv_signal;
int next_rr_cpu;
unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
- unsigned int irq_count;
};
struct kvm_hw_wp_info_arch {
@@ -465,6 +499,7 @@ struct kvm_vcpu_arch {
s390_fp_regs host_fpregs;
unsigned int host_acrs[NUM_ACRS];
s390_fp_regs guest_fpregs;
+ struct kvm_s390_vregs *host_vregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
@@ -515,15 +550,15 @@ struct s390_io_adapter {
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
-struct s390_model_fac {
- /* facilities used in SIE context */
- __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64];
- /* subset enabled by kvm */
- __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64];
+struct kvm_s390_fac {
+ /* facility list requested by guest */
+ __u64 list[S390_ARCH_FAC_LIST_SIZE_U64];
+ /* facility mask supported by kvm & hosting machine */
+ __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64];
};
struct kvm_s390_cpu_model {
- struct s390_model_fac *fac;
+ struct kvm_s390_fac *fac;
struct cpuid cpu_id;
unsigned short ibc;
};
@@ -553,6 +588,7 @@ struct kvm_arch{
int use_cmma;
int user_cpu_state_ctrl;
int user_sigp;
+ int user_stsi;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
int ipte_lock_count;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index f49b71954654..8fb3802f8fad 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -62,6 +62,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
{
int cpu = smp_processor_id();
+ S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
if (prev == next)
return;
if (MACHINE_HAS_TLB_LC)
@@ -73,7 +74,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
atomic_dec(&prev->context.attach_count);
if (MACHINE_HAS_TLB_LC)
cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask);
- S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd);
}
#define finish_arch_post_lock_switch finish_arch_post_lock_switch
diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h
index 7b2ac6e44166..53eacbd4f09b 100644
--- a/arch/s390/include/asm/page.h
+++ b/arch/s390/include/asm/page.h
@@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end
#endif
}
-static inline void clear_page(void *page)
-{
- register unsigned long reg1 asm ("1") = 0;
- register void *reg2 asm ("2") = page;
- register unsigned long reg3 asm ("3") = 4096;
- asm volatile(
- " mvcl 2,0"
- : "+d" (reg2), "+d" (reg3) : "d" (reg1)
- : "memory", "cc");
-}
+#define clear_page(page) memset((page), 0, PAGE_SIZE)
/*
* copy_page uses the mvcl instruction with 0xb0 padding byte in order to
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9c77e60b9a26..ef1a5fcc6c66 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -150,6 +150,7 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_CRS (1UL << 3)
#define KVM_SYNC_ARCH0 (1UL << 4)
#define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS (1UL << 6)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -164,6 +165,9 @@ struct kvm_sync_regs {
__u64 pft; /* pfault token [PFAULT] */
__u64 pfs; /* pfault select [PFAULT] */
__u64 pfc; /* pfault compare [PFAULT] */
+ __u64 vrs[32][2]; /* vector registers */
+ __u8 reserved[512]; /* for future vector expansion */
+ __u32 fpc; /* only valid with vector registers */
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/sie.h b/arch/s390/include/uapi/asm/sie.h
index d4096fdfc6ab..ee69c0854c88 100644
--- a/arch/s390/include/uapi/asm/sie.h
+++ b/arch/s390/include/uapi/asm/sie.h
@@ -230,7 +230,7 @@
* and returns a key, which can be used to find a mnemonic name
* of the instruction in the icpt_insn_codes table.
*/
-#define icpt_insn_decoder(insn) \
+#define icpt_insn_decoder(insn) ( \
INSN_DECODE_IPA0(0x01, insn, 48, 0xff) \
INSN_DECODE_IPA0(0xaa, insn, 48, 0x0f) \
INSN_DECODE_IPA0(0xb2, insn, 48, 0xff) \
@@ -239,6 +239,6 @@
INSN_DECODE_IPA0(0xe5, insn, 48, 0xff) \
INSN_DECODE_IPA0(0xeb, insn, 16, 0xff) \
INSN_DECODE_IPA0(0xc8, insn, 48, 0x0f) \
- INSN_DECODE(insn)
+ INSN_DECODE(insn))
#endif /* _UAPI_ASM_S390_SIE_H */
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index e07e91605353..8dc4db10d160 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -171,6 +171,7 @@ int main(void)
#else /* CONFIG_32BIT */
DEFINE(__LC_DATA_EXC_CODE, offsetof(struct _lowcore, data_exc_code));
DEFINE(__LC_MCCK_FAIL_STOR_ADDR, offsetof(struct _lowcore, failing_storage_address));
+ DEFINE(__LC_VX_SAVE_AREA_ADDR, offsetof(struct _lowcore, vector_save_area_addr));
DEFINE(__LC_EXT_PARAMS2, offsetof(struct _lowcore, ext_params2));
DEFINE(SAVE_AREA_BASE, offsetof(struct _lowcore, floating_pt_save_area));
DEFINE(__LC_PASTE, offsetof(struct _lowcore, paste));
diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c
index 82c19899574f..6c79f1b44fe7 100644
--- a/arch/s390/kernel/ftrace.c
+++ b/arch/s390/kernel/ftrace.c
@@ -57,6 +57,44 @@
unsigned long ftrace_plt;
+static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn)
+{
+#ifdef CC_USING_HOTPATCH
+ /* brcl 0,0 */
+ insn->opc = 0xc004;
+ insn->disp = 0;
+#else
+ /* stg r14,8(r15) */
+ insn->opc = 0xe3e0;
+ insn->disp = 0xf0080024;
+#endif
+}
+
+static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ if (insn->opc == BREAKPOINT_INSTRUCTION)
+ return 1;
+#endif
+ return 0;
+}
+
+static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_NOP;
+#endif
+}
+
+static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn)
+{
+#ifdef CONFIG_KPROBES
+ insn->opc = BREAKPOINT_INSTRUCTION;
+ insn->disp = KPROBE_ON_FTRACE_CALL;
+#endif
+}
+
int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
unsigned long addr)
{
@@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
return -EFAULT;
if (addr == MCOUNT_ADDR) {
/* Initial code replacement */
-#ifdef CC_USING_HOTPATCH
- /* We expect to see brcl 0,0 */
- ftrace_generate_nop_insn(&orig);
-#else
- /* We expect to see stg r14,8(r15) */
- orig.opc = 0xe3e0;
- orig.disp = 0xf0080024;
-#endif
+ ftrace_generate_orig_insn(&orig);
ftrace_generate_nop_insn(&new);
- } else if (old.opc == BREAKPOINT_INSTRUCTION) {
+ } else if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
* bytes of the original instruction so that the kprobes
* handler can execute a nop, if it reaches this breakpoint.
*/
- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
- orig.disp = KPROBE_ON_FTRACE_CALL;
- new.disp = KPROBE_ON_FTRACE_NOP;
+ ftrace_generate_kprobe_call_insn(&orig);
+ ftrace_generate_kprobe_nop_insn(&new);
} else {
/* Replace ftrace call with a nop. */
ftrace_generate_call_insn(&orig, rec->ip);
@@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old)))
return -EFAULT;
- if (old.opc == BREAKPOINT_INSTRUCTION) {
+ if (is_kprobe_on_ftrace(&old)) {
/*
* If we find a breakpoint instruction, a kprobe has been
* placed at the beginning of the function. We write the
@@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
* bytes of the original instruction so that the kprobes
* handler can execute a brasl if it reaches this breakpoint.
*/
- new.opc = orig.opc = BREAKPOINT_INSTRUCTION;
- orig.disp = KPROBE_ON_FTRACE_NOP;
- new.disp = KPROBE_ON_FTRACE_CALL;
+ ftrace_generate_kprobe_nop_insn(&orig);
+ ftrace_generate_kprobe_call_insn(&new);
} else {
/* Replace nop with an ftrace call. */
ftrace_generate_nop_insn(&orig);
diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c
index cb2d51e779df..830066f936c8 100644
--- a/arch/s390/kernel/jump_label.c
+++ b/arch/s390/kernel/jump_label.c
@@ -36,16 +36,20 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
insn->offset = (entry->target - entry->code) >> 1;
}
-static void jump_label_bug(struct jump_entry *entry, struct insn *insn)
+static void jump_label_bug(struct jump_entry *entry, struct insn *expected,
+ struct insn *new)
{
unsigned char *ipc = (unsigned char *)entry->code;
- unsigned char *ipe = (unsigned char *)insn;
+ unsigned char *ipe = (unsigned char *)expected;
+ unsigned char *ipn = (unsigned char *)new;
pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc);
pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n",
ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]);
pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n",
ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]);
+ pr_emerg("New: %02x %02x %02x %02x %02x %02x\n",
+ ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]);
panic("Corrupted kernel text");
}
@@ -69,10 +73,10 @@ static void __jump_label_transform(struct jump_entry *entry,
}
if (init) {
if (memcmp((void *)entry->code, &orignop, sizeof(orignop)))
- jump_label_bug(entry, &old);
+ jump_label_bug(entry, &orignop, &new);
} else {
if (memcmp((void *)entry->code, &old, sizeof(old)))
- jump_label_bug(entry, &old);
+ jump_label_bug(entry, &old, &new);
}
probe_kernel_write((void *)entry->code, &new, sizeof(new));
}
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 36154a2f1814..2ca95862e336 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -436,6 +436,7 @@ int module_finalize(const Elf_Ehdr *hdr,
const Elf_Shdr *sechdrs,
struct module *me)
{
+ jump_label_apply_nops(me);
vfree(me->arch.syminfo);
me->arch.syminfo = NULL;
return 0;
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index c3f8d157cb0d..e6a1578fc000 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG);
static struct attribute *cpumsf_pmu_events_attr[] = {
CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC),
- CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG),
+ NULL,
NULL,
};
@@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void)
return -EINVAL;
}
- if (si.ad)
+ if (si.ad) {
sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB);
+ cpumsf_pmu_events_attr[1] =
+ CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG);
+ }
sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80);
if (!sfdbg)
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 26108232fcaa..dc488e13b7e3 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -18,7 +18,7 @@
static DEFINE_PER_CPU(struct cpuid, cpu_id);
-void cpu_relax(void)
+void notrace cpu_relax(void)
{
if (!smp_cpu_mtid && MACHINE_HAS_DIAG44)
asm volatile("diag 0,0,0x44");
diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S
index 6b09fdffbd2f..ca6294645dd3 100644
--- a/arch/s390/kernel/swsusp_asm64.S
+++ b/arch/s390/kernel/swsusp_asm64.S
@@ -177,6 +177,17 @@ restart_entry:
lhi %r1,1
sigp %r1,%r0,SIGP_SET_ARCHITECTURE
sam64
+#ifdef CONFIG_SMP
+ larl %r1,smp_cpu_mt_shift
+ icm %r1,15,0(%r1)
+ jz smt_done
+ llgfr %r1,%r1
+smt_loop:
+ sigp %r1,%r0,SIGP_SET_MULTI_THREADING
+ brc 8,smt_done /* accepted */
+ brc 2,smt_loop /* busy, try again */
+smt_done:
+#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 20660dddb2d6..170ddd2018b3 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -215,20 +215,20 @@ void update_vsyscall(struct timekeeper *tk)
{
u64 nsecps;
- if (tk->tkr.clock != &clocksource_tod)
+ if (tk->tkr_mono.clock != &clocksource_tod)
return;
/* Make userspace gettimeofday spin until we're done. */
++vdso_data->tb_update_count;
smp_wmb();
- vdso_data->xtime_tod_stamp = tk->tkr.cycle_last;
+ vdso_data->xtime_tod_stamp = tk->tkr_mono.cycle_last;
vdso_data->xtime_clock_sec = tk->xtime_sec;
- vdso_data->xtime_clock_nsec = tk->tkr.xtime_nsec;
+ vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
vdso_data->wtom_clock_sec =
tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
- vdso_data->wtom_clock_nsec = tk->tkr.xtime_nsec +
- + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr.shift);
- nsecps = (u64) NSEC_PER_SEC << tk->tkr.shift;
+ vdso_data->wtom_clock_nsec = tk->tkr_mono.xtime_nsec +
+ + ((u64) tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ nsecps = (u64) NSEC_PER_SEC << tk->tkr_mono.shift;
while (vdso_data->wtom_clock_nsec >= nsecps) {
vdso_data->wtom_clock_nsec -= nsecps;
vdso_data->wtom_clock_sec++;
@@ -236,7 +236,7 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->xtime_coarse_sec = tk->xtime_sec;
vdso_data->xtime_coarse_nsec =
- (long)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+ (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
vdso_data->wtom_coarse_sec =
vdso_data->xtime_coarse_sec + tk->wall_to_monotonic.tv_sec;
vdso_data->wtom_coarse_nsec =
@@ -246,8 +246,8 @@ void update_vsyscall(struct timekeeper *tk)
vdso_data->wtom_coarse_sec++;
}
- vdso_data->tk_mult = tk->tkr.mult;
- vdso_data->tk_shift = tk->tkr.shift;
+ vdso_data->tk_mult = tk->tkr_mono.mult;
+ vdso_data->tk_shift = tk->tkr_mono.shift;
smp_wmb();
++vdso_data->tb_update_count;
}
@@ -283,7 +283,7 @@ void __init time_init(void)
if (register_external_irq(EXT_IRQ_TIMING_ALERT, timing_alert_interrupt))
panic("Couldn't request external interrupt 0x1406");
- if (clocksource_register(&clocksource_tod) != 0)
+ if (__clocksource_register(&clocksource_tod) != 0)
panic("Could not register TOD clock source");
/* Enable TOD clock interrupts on the boot cpu. */
diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 9254afff250c..fc7ec95848c3 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -77,7 +77,7 @@ static int __diag_page_ref_service(struct kvm_vcpu *vcpu)
if (vcpu->run->s.regs.gprs[rx] & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], &parm, sizeof(parm));
+ rc = read_guest(vcpu, vcpu->run->s.regs.gprs[rx], rx, &parm, sizeof(parm));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (parm.parm_version != 2 || parm.parm_len < 5 || parm.code != 0x258)
@@ -213,7 +213,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
* - gpr 3 contains the virtqueue index (passed as datamatch)
* - gpr 4 contains the index on the bus (optionally)
*/
- ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
+ ret = kvm_io_bus_write_cookie(vcpu, KVM_VIRTIO_CCW_NOTIFY_BUS,
vcpu->run->s.regs.gprs[2] & 0xffffffff,
8, &vcpu->run->s.regs.gprs[3],
vcpu->run->s.regs.gprs[4]);
@@ -230,7 +230,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
{
- int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
+ int code = kvm_s390_get_base_disp_rs(vcpu, NULL) & 0xffff;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 267523cac6de..a7559f7207df 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -10,6 +10,7 @@
#include <asm/pgtable.h>
#include "kvm-s390.h"
#include "gaccess.h"
+#include <asm/switch_to.h>
union asce {
unsigned long val;
@@ -207,6 +208,54 @@ union raddress {
unsigned long pfra : 52; /* Page-Frame Real Address */
};
+union alet {
+ u32 val;
+ struct {
+ u32 reserved : 7;
+ u32 p : 1;
+ u32 alesn : 8;
+ u32 alen : 16;
+ };
+};
+
+union ald {
+ u32 val;
+ struct {
+ u32 : 1;
+ u32 alo : 24;
+ u32 all : 7;
+ };
+};
+
+struct ale {
+ unsigned long i : 1; /* ALEN-Invalid Bit */
+ unsigned long : 5;
+ unsigned long fo : 1; /* Fetch-Only Bit */
+ unsigned long p : 1; /* Private Bit */
+ unsigned long alesn : 8; /* Access-List-Entry Sequence Number */
+ unsigned long aleax : 16; /* Access-List-Entry Authorization Index */
+ unsigned long : 32;
+ unsigned long : 1;
+ unsigned long asteo : 25; /* ASN-Second-Table-Entry Origin */
+ unsigned long : 6;
+ unsigned long astesn : 32; /* ASTE Sequence Number */
+} __packed;
+
+struct aste {
+ unsigned long i : 1; /* ASX-Invalid Bit */
+ unsigned long ato : 29; /* Authority-Table Origin */
+ unsigned long : 1;
+ unsigned long b : 1; /* Base-Space Bit */
+ unsigned long ax : 16; /* Authorization Index */
+ unsigned long atl : 12; /* Authority-Table Length */
+ unsigned long : 2;
+ unsigned long ca : 1; /* Controlled-ASN Bit */
+ unsigned long ra : 1; /* Reusable-ASN Bit */
+ unsigned long asce : 64; /* Address-Space-Control Element */
+ unsigned long ald : 32;
+ unsigned long astesn : 32;
+ /* .. more fields there */
+} __packed;
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
@@ -307,15 +356,157 @@ void ipte_unlock(struct kvm_vcpu *vcpu)
ipte_unlock_simple(vcpu);
}
-static unsigned long get_vcpu_asce(struct kvm_vcpu *vcpu)
+static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar,
+ int write)
+{
+ union alet alet;
+ struct ale ale;
+ struct aste aste;
+ unsigned long ald_addr, authority_table_addr;
+ union ald ald;
+ int eax, rc;
+ u8 authority_table;
+
+ if (ar >= NUM_ACRS)
+ return -EINVAL;
+
+ save_access_regs(vcpu->run->s.regs.acrs);
+ alet.val = vcpu->run->s.regs.acrs[ar];
+
+ if (ar == 0 || alet.val == 0) {
+ asce->val = vcpu->arch.sie_block->gcr[1];
+ return 0;
+ } else if (alet.val == 1) {
+ asce->val = vcpu->arch.sie_block->gcr[7];
+ return 0;
+ }
+
+ if (alet.reserved)
+ return PGM_ALET_SPECIFICATION;
+
+ if (alet.p)
+ ald_addr = vcpu->arch.sie_block->gcr[5];
+ else
+ ald_addr = vcpu->arch.sie_block->gcr[2];
+ ald_addr &= 0x7fffffc0;
+
+ rc = read_guest_real(vcpu, ald_addr + 16, &ald.val, sizeof(union ald));
+ if (rc)
+ return rc;
+
+ if (alet.alen / 8 > ald.all)
+ return PGM_ALEN_TRANSLATION;
+
+ if (0x7fffffff - ald.alo * 128 < alet.alen * 16)
+ return PGM_ADDRESSING;
+
+ rc = read_guest_real(vcpu, ald.alo * 128 + alet.alen * 16, &ale,
+ sizeof(struct ale));
+ if (rc)
+ return rc;
+
+ if (ale.i == 1)
+ return PGM_ALEN_TRANSLATION;
+ if (ale.alesn != alet.alesn)
+ return PGM_ALE_SEQUENCE;
+
+ rc = read_guest_real(vcpu, ale.asteo * 64, &aste, sizeof(struct aste));
+ if (rc)
+ return rc;
+
+ if (aste.i)
+ return PGM_ASTE_VALIDITY;
+ if (aste.astesn != ale.astesn)
+ return PGM_ASTE_SEQUENCE;
+
+ if (ale.p == 1) {
+ eax = (vcpu->arch.sie_block->gcr[8] >> 16) & 0xffff;
+ if (ale.aleax != eax) {
+ if (eax / 16 > aste.atl)
+ return PGM_EXTENDED_AUTHORITY;
+
+ authority_table_addr = aste.ato * 4 + eax / 4;
+
+ rc = read_guest_real(vcpu, authority_table_addr,
+ &authority_table,
+ sizeof(u8));
+ if (rc)
+ return rc;
+
+ if ((authority_table & (0x40 >> ((eax & 3) * 2))) == 0)
+ return PGM_EXTENDED_AUTHORITY;
+ }
+ }
+
+ if (ale.fo == 1 && write)
+ return PGM_PROTECTION;
+
+ asce->val = aste.asce;
+ return 0;
+}
+
+struct trans_exc_code_bits {
+ unsigned long addr : 52; /* Translation-exception Address */
+ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
+ unsigned long : 6;
+ unsigned long b60 : 1;
+ unsigned long b61 : 1;
+ unsigned long as : 2; /* ASCE Identifier */
+};
+
+enum {
+ FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
+ FSI_STORE = 1, /* Exception was due to store operation */
+ FSI_FETCH = 2 /* Exception was due to fetch operation */
+};
+
+static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce,
+ ar_t ar, int write)
{
+ int rc;
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
+ struct trans_exc_code_bits *tec_bits;
+
+ memset(pgm, 0, sizeof(*pgm));
+ tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
+ tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
+ tec_bits->as = psw_bits(*psw).as;
+
+ if (!psw_bits(*psw).t) {
+ asce->val = 0;
+ asce->r = 1;
+ return 0;
+ }
+
switch (psw_bits(vcpu->arch.sie_block->gpsw).as) {
case PSW_AS_PRIMARY:
- return vcpu->arch.sie_block->gcr[1];
+ asce->val = vcpu->arch.sie_block->gcr[1];
+ return 0;
case PSW_AS_SECONDARY:
- return vcpu->arch.sie_block->gcr[7];
+ asce->val = vcpu->arch.sie_block->gcr[7];
+ return 0;
case PSW_AS_HOME:
- return vcpu->arch.sie_block->gcr[13];
+ asce->val = vcpu->arch.sie_block->gcr[13];
+ return 0;
+ case PSW_AS_ACCREG:
+ rc = ar_translation(vcpu, asce, ar, write);
+ switch (rc) {
+ case PGM_ALEN_TRANSLATION:
+ case PGM_ALE_SEQUENCE:
+ case PGM_ASTE_VALIDITY:
+ case PGM_ASTE_SEQUENCE:
+ case PGM_EXTENDED_AUTHORITY:
+ vcpu->arch.pgm.exc_access_id = ar;
+ break;
+ case PGM_PROTECTION:
+ tec_bits->b60 = 1;
+ tec_bits->b61 = 1;
+ break;
+ }
+ if (rc > 0)
+ pgm->code = rc;
+ return rc;
}
return 0;
}
@@ -330,10 +521,11 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* @vcpu: virtual cpu
* @gva: guest virtual address
* @gpa: points to where guest physical (absolute) address should be stored
+ * @asce: effective asce
* @write: indicates if access is a write access
*
* Translate a guest virtual address into a guest absolute address by means
- * of dynamic address translation as specified by the architecuture.
+ * of dynamic address translation as specified by the architecture.
* If the resulting absolute address is not available in the configuration
* an addressing exception is indicated and @gpa will not be changed.
*
@@ -345,7 +537,8 @@ static int deref_table(struct kvm *kvm, unsigned long gpa, unsigned long *val)
* by the architecture
*/
static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
- unsigned long *gpa, int write)
+ unsigned long *gpa, const union asce asce,
+ int write)
{
union vaddress vaddr = {.addr = gva};
union raddress raddr = {.addr = gva};
@@ -354,12 +547,10 @@ static unsigned long guest_translate(struct kvm_vcpu *vcpu, unsigned long gva,
union ctlreg0 ctlreg0;
unsigned long ptr;
int edat1, edat2;
- union asce asce;
ctlreg0.val = vcpu->arch.sie_block->gcr[0];
edat1 = ctlreg0.edat && test_kvm_facility(vcpu->kvm, 8);
edat2 = edat1 && test_kvm_facility(vcpu->kvm, 78);
- asce.val = get_vcpu_asce(vcpu);
if (asce.r)
goto real_address;
ptr = asce.origin * 4096;
@@ -506,48 +697,30 @@ static inline int is_low_address(unsigned long ga)
return (ga & ~0x11fful) == 0;
}
-static int low_address_protection_enabled(struct kvm_vcpu *vcpu)
+static int low_address_protection_enabled(struct kvm_vcpu *vcpu,
+ const union asce asce)
{
union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
psw_t *psw = &vcpu->arch.sie_block->gpsw;
- union asce asce;
if (!ctlreg0.lap)
return 0;
- asce.val = get_vcpu_asce(vcpu);
if (psw_bits(*psw).t && asce.p)
return 0;
return 1;
}
-struct trans_exc_code_bits {
- unsigned long addr : 52; /* Translation-exception Address */
- unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */
- unsigned long : 7;
- unsigned long b61 : 1;
- unsigned long as : 2; /* ASCE Identifier */
-};
-
-enum {
- FSI_UNKNOWN = 0, /* Unknown wether fetch or store */
- FSI_STORE = 1, /* Exception was due to store operation */
- FSI_FETCH = 2 /* Exception was due to fetch operation */
-};
-
static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
unsigned long *pages, unsigned long nr_pages,
- int write)
+ const union asce asce, int write)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
struct trans_exc_code_bits *tec_bits;
int lap_enabled, rc;
- memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec_bits->fsi = write ? FSI_STORE : FSI_FETCH;
- tec_bits->as = psw_bits(*psw).as;
- lap_enabled = low_address_protection_enabled(vcpu);
+ lap_enabled = low_address_protection_enabled(vcpu, asce);
while (nr_pages) {
ga = kvm_s390_logical_to_effective(vcpu, ga);
tec_bits->addr = ga >> PAGE_SHIFT;
@@ -557,7 +730,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
}
ga &= PAGE_MASK;
if (psw_bits(*psw).t) {
- rc = guest_translate(vcpu, ga, pages, write);
+ rc = guest_translate(vcpu, ga, pages, asce, write);
if (rc < 0)
return rc;
if (rc == PGM_PROTECTION)
@@ -578,7 +751,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga,
return 0;
}
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len, int write)
{
psw_t *psw = &vcpu->arch.sie_block->gpsw;
@@ -591,20 +764,19 @@ int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
if (!len)
return 0;
- /* Access register mode is not supported yet. */
- if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
- return -EOPNOTSUPP;
+ rc = get_vcpu_asce(vcpu, &asce, ar, write);
+ if (rc)
+ return rc;
nr_pages = (((ga & ~PAGE_MASK) + len - 1) >> PAGE_SHIFT) + 1;
pages = pages_array;
if (nr_pages > ARRAY_SIZE(pages_array))
pages = vmalloc(nr_pages * sizeof(unsigned long));
if (!pages)
return -ENOMEM;
- asce.val = get_vcpu_asce(vcpu);
need_ipte_lock = psw_bits(*psw).t && !asce.r;
if (need_ipte_lock)
ipte_lock(vcpu);
- rc = guest_page_range(vcpu, ga, pages, nr_pages, write);
+ rc = guest_page_range(vcpu, ga, pages, nr_pages, asce, write);
for (idx = 0; idx < nr_pages && !rc; idx++) {
gpa = *(pages + idx) + (ga & ~PAGE_MASK);
_len = min(PAGE_SIZE - (gpa & ~PAGE_MASK), len);
@@ -652,7 +824,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* Note: The IPTE lock is not taken during this function, so the caller
* has to take care of this.
*/
-int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
+int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
unsigned long *gpa, int write)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
@@ -661,26 +833,21 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
union asce asce;
int rc;
- /* Access register mode is not supported yet. */
- if (psw_bits(*psw).t && psw_bits(*psw).as == PSW_AS_ACCREG)
- return -EOPNOTSUPP;
-
gva = kvm_s390_logical_to_effective(vcpu, gva);
- memset(pgm, 0, sizeof(*pgm));
tec = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
- tec->as = psw_bits(*psw).as;
- tec->fsi = write ? FSI_STORE : FSI_FETCH;
+ rc = get_vcpu_asce(vcpu, &asce, ar, write);
tec->addr = gva >> PAGE_SHIFT;
- if (is_low_address(gva) && low_address_protection_enabled(vcpu)) {
+ if (rc)
+ return rc;
+ if (is_low_address(gva) && low_address_protection_enabled(vcpu, asce)) {
if (write) {
rc = pgm->code = PGM_PROTECTION;
return rc;
}
}
- asce.val = get_vcpu_asce(vcpu);
if (psw_bits(*psw).t && !asce.r) { /* Use DAT? */
- rc = guest_translate(vcpu, gva, gpa, write);
+ rc = guest_translate(vcpu, gva, gpa, asce, write);
if (rc > 0) {
if (rc == PGM_PROTECTION)
tec->b61 = 1;
@@ -697,28 +864,51 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
}
/**
- * kvm_s390_check_low_addr_protection - check for low-address protection
- * @ga: Guest address
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+ unsigned long length, int is_write)
+{
+ unsigned long gpa;
+ unsigned long currlen;
+ int rc = 0;
+
+ ipte_lock(vcpu);
+ while (length > 0 && !rc) {
+ currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+ rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+ gva += currlen;
+ length -= currlen;
+ }
+ ipte_unlock(vcpu);
+
+ return rc;
+}
+
+/**
+ * kvm_s390_check_low_addr_prot_real - check for low-address protection
+ * @gra: Guest real address
*
* Checks whether an address is subject to low-address protection and set
* up vcpu->arch.pgm accordingly if necessary.
*
* Return: 0 if no protection exception, or PGM_PROTECTION if protected.
*/
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga)
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
{
struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm;
psw_t *psw = &vcpu->arch.sie_block->gpsw;
struct trans_exc_code_bits *tec_bits;
+ union ctlreg0 ctlreg0 = {.val = vcpu->arch.sie_block->gcr[0]};
- if (!is_low_address(ga) || !low_address_protection_enabled(vcpu))
+ if (!ctlreg0.lap || !is_low_address(gra))
return 0;
memset(pgm, 0, sizeof(*pgm));
tec_bits = (struct trans_exc_code_bits *)&pgm->trans_exc_code;
tec_bits->fsi = FSI_STORE;
tec_bits->as = psw_bits(*psw).as;
- tec_bits->addr = ga >> PAGE_SHIFT;
+ tec_bits->addr = gra >> PAGE_SHIFT;
pgm->code = PGM_PROTECTION;
return pgm->code;
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 0149cf15058a..ef03726cc661 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -156,9 +156,11 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
}
int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
- unsigned long *gpa, int write);
+ ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+ unsigned long length, int is_write);
-int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len, int write);
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
@@ -168,6 +170,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* write_guest - copy data from kernel space to guest space
* @vcpu: virtual cpu
* @ga: guest address
+ * @ar: access register
* @data: source address in kernel space
* @len: number of bytes to copy
*
@@ -176,8 +179,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* If DAT is off data will be copied to guest real or absolute memory.
* If DAT is on data will be copied to the address space as specified by
* the address space bits of the PSW:
- * Primary, secondory or home space (access register mode is currently not
- * implemented).
+ * Primary, secondary, home space or access register mode.
* The addressing mode of the PSW is also inspected, so that address wrap
* around is taken into account for 24-, 31- and 64-bit addressing mode,
* if the to be copied data crosses page boundaries in guest address space.
@@ -210,16 +212,17 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
* if data has been changed in guest space in case of an exception.
*/
static inline __must_check
-int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, data, len, 1);
+ return access_guest(vcpu, ga, ar, data, len, 1);
}
/**
* read_guest - copy data from guest space to kernel space
* @vcpu: virtual cpu
* @ga: guest address
+ * @ar: access register
* @data: destination address in kernel space
* @len: number of bytes to copy
*
@@ -229,10 +232,10 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
* data will be copied from guest space to kernel space.
*/
static inline __must_check
-int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, void *data,
+int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
unsigned long len)
{
- return access_guest(vcpu, ga, data, len, 0);
+ return access_guest(vcpu, ga, ar, data, len, 0);
}
/**
@@ -330,6 +333,6 @@ int read_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
void ipte_lock(struct kvm_vcpu *vcpu);
void ipte_unlock(struct kvm_vcpu *vcpu);
int ipte_lock_held(struct kvm_vcpu *vcpu);
-int kvm_s390_check_low_addr_protection(struct kvm_vcpu *vcpu, unsigned long ga);
+int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra);
#endif /* __KVM_S390_GACCESS_H */
diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c
index 3e8d4092ce30..e97b3455d7e6 100644
--- a/arch/s390/kvm/guestdbg.c
+++ b/arch/s390/kvm/guestdbg.c
@@ -191,8 +191,8 @@ static int __import_wp_info(struct kvm_vcpu *vcpu,
if (!wp_info->old_data)
return -ENOMEM;
/* try to backup the original value */
- ret = read_guest(vcpu, wp_info->phys_addr, wp_info->old_data,
- wp_info->len);
+ ret = read_guest_abs(vcpu, wp_info->phys_addr, wp_info->old_data,
+ wp_info->len);
if (ret) {
kfree(wp_info->old_data);
wp_info->old_data = NULL;
@@ -362,8 +362,8 @@ static struct kvm_hw_wp_info_arch *any_wp_changed(struct kvm_vcpu *vcpu)
continue;
/* refetch the wp data and compare it to the old value */
- if (!read_guest(vcpu, wp_info->phys_addr, temp,
- wp_info->len)) {
+ if (!read_guest_abs(vcpu, wp_info->phys_addr, temp,
+ wp_info->len)) {
if (memcmp(temp, wp_info->old_data, wp_info->len)) {
kfree(temp);
return wp_info;
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index bebd2157edd0..9e3779e3e496 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -165,6 +165,7 @@ static void __extract_prog_irq(struct kvm_vcpu *vcpu,
pgm_info->mon_class_nr = vcpu->arch.sie_block->mcn;
pgm_info->mon_code = vcpu->arch.sie_block->tecmc;
break;
+ case PGM_VECTOR_PROCESSING:
case PGM_DATA:
pgm_info->data_exc_code = vcpu->arch.sie_block->dxc;
break;
@@ -319,7 +320,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the source is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg2],
- &srcaddr, 0);
+ reg2, &srcaddr, 0);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, srcaddr, 0);
@@ -328,7 +329,7 @@ static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
/* Make sure that the destination is paged-in */
rc = guest_translate_address(vcpu, vcpu->run->s.regs.gprs[reg1],
- &dstaddr, 1);
+ reg1, &dstaddr, 1);
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
rc = kvm_arch_fault_in_page(vcpu, dstaddr, 1);
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 073b5f387d1d..9de47265ef73 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -1,7 +1,7 @@
/*
* handling kvm guest interrupts
*
- * Copyright IBM Corp. 2008,2014
+ * Copyright IBM Corp. 2008, 2015
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
@@ -17,9 +17,12 @@
#include <linux/signal.h>
#include <linux/slab.h>
#include <linux/bitmap.h>
+#include <linux/vmalloc.h>
#include <asm/asm-offsets.h>
+#include <asm/dis.h>
#include <asm/uaccess.h>
#include <asm/sclp.h>
+#include <asm/isc.h>
#include "kvm-s390.h"
#include "gaccess.h"
#include "trace-s390.h"
@@ -32,11 +35,6 @@
#define PFAULT_DONE 0x0680
#define VIRTIO_PARAM 0x0d00
-static int is_ioint(u64 type)
-{
- return ((type & 0xfffe0000u) != 0xfffe0000u);
-}
-
int psw_extint_disabled(struct kvm_vcpu *vcpu)
{
return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_EXT);
@@ -72,70 +70,45 @@ static int ckc_interrupts_enabled(struct kvm_vcpu *vcpu)
return 1;
}
-static u64 int_word_to_isc_bits(u32 int_word)
+static int ckc_irq_pending(struct kvm_vcpu *vcpu)
+{
+ if (!(vcpu->arch.sie_block->ckc <
+ get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
+ return 0;
+ return ckc_interrupts_enabled(vcpu);
+}
+
+static int cpu_timer_interrupts_enabled(struct kvm_vcpu *vcpu)
+{
+ return !psw_extint_disabled(vcpu) &&
+ (vcpu->arch.sie_block->gcr[0] & 0x400ul);
+}
+
+static int cpu_timer_irq_pending(struct kvm_vcpu *vcpu)
+{
+ return (vcpu->arch.sie_block->cputm >> 63) &&
+ cpu_timer_interrupts_enabled(vcpu);
+}
+
+static inline int is_ioirq(unsigned long irq_type)
{
- u8 isc = (int_word & 0x38000000) >> 27;
+ return ((irq_type >= IRQ_PEND_IO_ISC_0) &&
+ (irq_type <= IRQ_PEND_IO_ISC_7));
+}
+static uint64_t isc_to_isc_bits(int isc)
+{
return (0x80 >> isc) << 24;
}
-static int __must_check __interrupt_is_deliverable(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
+static inline u8 int_word_to_isc(u32 int_word)
{
- switch (inti->type) {
- case KVM_S390_INT_EXTERNAL_CALL:
- if (psw_extint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x2000ul)
- return 1;
- return 0;
- case KVM_S390_INT_EMERGENCY:
- if (psw_extint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x4000ul)
- return 1;
- return 0;
- case KVM_S390_INT_CLOCK_COMP:
- return ckc_interrupts_enabled(vcpu);
- case KVM_S390_INT_CPU_TIMER:
- if (psw_extint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x400ul)
- return 1;
- return 0;
- case KVM_S390_INT_SERVICE:
- case KVM_S390_INT_PFAULT_INIT:
- case KVM_S390_INT_PFAULT_DONE:
- case KVM_S390_INT_VIRTIO:
- if (psw_extint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[0] & 0x200ul)
- return 1;
- return 0;
- case KVM_S390_PROGRAM_INT:
- case KVM_S390_SIGP_STOP:
- case KVM_S390_SIGP_SET_PREFIX:
- case KVM_S390_RESTART:
- return 1;
- case KVM_S390_MCHK:
- if (psw_mchk_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[14] & inti->mchk.cr14)
- return 1;
- return 0;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (psw_ioint_disabled(vcpu))
- return 0;
- if (vcpu->arch.sie_block->gcr[6] &
- int_word_to_isc_bits(inti->io.io_int_word))
- return 1;
- return 0;
- default:
- printk(KERN_WARNING "illegal interrupt type %llx\n",
- inti->type);
- BUG();
- }
- return 0;
+ return (int_word & 0x38000000) >> 27;
+}
+
+static inline unsigned long pending_floating_irqs(struct kvm_vcpu *vcpu)
+{
+ return vcpu->kvm->arch.float_int.pending_irqs;
}
static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
@@ -143,12 +116,31 @@ static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
return vcpu->arch.local_int.pending_irqs;
}
-static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+static unsigned long disable_iscs(struct kvm_vcpu *vcpu,
+ unsigned long active_mask)
+{
+ int i;
+
+ for (i = 0; i <= MAX_ISC; i++)
+ if (!(vcpu->arch.sie_block->gcr[6] & isc_to_isc_bits(i)))
+ active_mask &= ~(1UL << (IRQ_PEND_IO_ISC_0 + i));
+
+ return active_mask;
+}
+
+static unsigned long deliverable_irqs(struct kvm_vcpu *vcpu)
{
- unsigned long active_mask = pending_local_irqs(vcpu);
+ unsigned long active_mask;
+
+ active_mask = pending_local_irqs(vcpu);
+ active_mask |= pending_floating_irqs(vcpu);
if (psw_extint_disabled(vcpu))
active_mask &= ~IRQ_PEND_EXT_MASK;
+ if (psw_ioint_disabled(vcpu))
+ active_mask &= ~IRQ_PEND_IO_MASK;
+ else
+ active_mask = disable_iscs(vcpu, active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
@@ -157,8 +149,13 @@ static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+ if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
+ __clear_bit(IRQ_PEND_EXT_SERVICE, &active_mask);
if (psw_mchk_disabled(vcpu))
active_mask &= ~IRQ_PEND_MCHK_MASK;
+ if (!(vcpu->arch.sie_block->gcr[14] &
+ vcpu->kvm->arch.float_int.mchk.cr14))
+ __clear_bit(IRQ_PEND_MCHK_REP, &active_mask);
/*
* STOP irqs will never be actively delivered. They are triggered via
@@ -200,6 +197,16 @@ static void __set_cpuflag(struct kvm_vcpu *vcpu, u32 flag)
atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
}
+static void set_intercept_indicators_io(struct kvm_vcpu *vcpu)
+{
+ if (!(pending_floating_irqs(vcpu) & IRQ_PEND_IO_MASK))
+ return;
+ else if (psw_ioint_disabled(vcpu))
+ __set_cpuflag(vcpu, CPUSTAT_IO_INT);
+ else
+ vcpu->arch.sie_block->lctl |= LCTL_CR6;
+}
+
static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
{
if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
@@ -226,47 +233,17 @@ static void set_intercept_indicators_stop(struct kvm_vcpu *vcpu)
__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
}
-/* Set interception request for non-deliverable local interrupts */
-static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+/* Set interception request for non-deliverable interrupts */
+static void set_intercept_indicators(struct kvm_vcpu *vcpu)
{
+ set_intercept_indicators_io(vcpu);
set_intercept_indicators_ext(vcpu);
set_intercept_indicators_mchk(vcpu);
set_intercept_indicators_stop(vcpu);
}
-static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
-{
- switch (inti->type) {
- case KVM_S390_INT_SERVICE:
- case KVM_S390_INT_PFAULT_DONE:
- case KVM_S390_INT_VIRTIO:
- if (psw_extint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_EXT_INT);
- else
- vcpu->arch.sie_block->lctl |= LCTL_CR0;
- break;
- case KVM_S390_MCHK:
- if (psw_mchk_disabled(vcpu))
- vcpu->arch.sie_block->ictl |= ICTL_LPSW;
- else
- vcpu->arch.sie_block->lctl |= LCTL_CR14;
- break;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- if (psw_ioint_disabled(vcpu))
- __set_cpuflag(vcpu, CPUSTAT_IO_INT);
- else
- vcpu->arch.sie_block->lctl |= LCTL_CR6;
- break;
- default:
- BUG();
- }
-}
-
static u16 get_ilc(struct kvm_vcpu *vcpu)
{
- const unsigned short table[] = { 2, 4, 4, 6 };
-
switch (vcpu->arch.sie_block->icptcode) {
case ICPT_INST:
case ICPT_INSTPROGI:
@@ -274,7 +251,7 @@ static u16 get_ilc(struct kvm_vcpu *vcpu)
case ICPT_PARTEXEC:
case ICPT_IOINST:
/* last instruction only stored for these icptcodes */
- return table[vcpu->arch.sie_block->ipa >> 14];
+ return insn_length(vcpu->arch.sie_block->ipa >> 8);
case ICPT_PROGI:
return vcpu->arch.sie_block->pgmilc;
default:
@@ -350,38 +327,72 @@ static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
{
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_mchk_info mchk;
- int rc;
+ struct kvm_s390_mchk_info mchk = {};
+ unsigned long adtl_status_addr;
+ int deliver = 0;
+ int rc = 0;
+ spin_lock(&fi->lock);
spin_lock(&li->lock);
- mchk = li->irq.mchk;
+ if (test_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs) ||
+ test_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs)) {
+ /*
+ * If there was an exigent machine check pending, then any
+ * repressible machine checks that might have been pending
+ * are indicated along with it, so always clear bits for
+ * repressible and exigent interrupts
+ */
+ mchk = li->irq.mchk;
+ clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+ clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+ memset(&li->irq.mchk, 0, sizeof(mchk));
+ deliver = 1;
+ }
/*
- * If there was an exigent machine check pending, then any repressible
- * machine checks that might have been pending are indicated along
- * with it, so always clear both bits
+ * We indicate floating repressible conditions along with
+ * other pending conditions. Channel Report Pending and Channel
+ * Subsystem damage are the only two and and are indicated by
+ * bits in mcic and masked in cr14.
*/
- clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
- clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
- memset(&li->irq.mchk, 0, sizeof(mchk));
+ if (test_and_clear_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+ mchk.mcic |= fi->mchk.mcic;
+ mchk.cr14 |= fi->mchk.cr14;
+ memset(&fi->mchk, 0, sizeof(mchk));
+ deliver = 1;
+ }
spin_unlock(&li->lock);
+ spin_unlock(&fi->lock);
- VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
- mchk.mcic);
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
- mchk.cr14, mchk.mcic);
-
- rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
- rc |= put_guest_lc(vcpu, mchk.mcic,
- (u64 __user *) __LC_MCCK_CODE);
- rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
- (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
- rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
- &mchk.fixed_logout, sizeof(mchk.fixed_logout));
- rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+ if (deliver) {
+ VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+ mchk.mcic);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_MCHK,
+ mchk.cr14, mchk.mcic);
+
+ rc = kvm_s390_vcpu_store_status(vcpu,
+ KVM_S390_STORE_STATUS_PREFIXED);
+ rc |= read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR,
+ &adtl_status_addr,
+ sizeof(unsigned long));
+ rc |= kvm_s390_vcpu_store_adtl_status(vcpu,
+ adtl_status_addr);
+ rc |= put_guest_lc(vcpu, mchk.mcic,
+ (u64 __user *) __LC_MCCK_CODE);
+ rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+ (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+ &mchk.fixed_logout,
+ sizeof(mchk.fixed_logout));
+ rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ }
return rc ? -EFAULT : 0;
}
@@ -484,7 +495,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
struct kvm_s390_pgm_info pgm_info;
- int rc = 0;
+ int rc = 0, nullifying = false;
u16 ilc = get_ilc(vcpu);
spin_lock(&li->lock);
@@ -509,6 +520,8 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
case PGM_LX_TRANSLATION:
case PGM_PRIMARY_AUTHORITY:
case PGM_SECONDARY_AUTHORITY:
+ nullifying = true;
+ /* fall through */
case PGM_SPACE_SWITCH:
rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
(u64 *)__LC_TRANS_EXC_CODE);
@@ -521,6 +534,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
case PGM_EXTENDED_AUTHORITY:
rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
+ nullifying = true;
break;
case PGM_ASCE_TYPE:
case PGM_PAGE_TRANSLATION:
@@ -534,6 +548,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *)__LC_EXC_ACCESS_ID);
rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
(u8 *)__LC_OP_ACCESS_ID);
+ nullifying = true;
break;
case PGM_MONITOR:
rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
@@ -541,6 +556,7 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
rc |= put_guest_lc(vcpu, pgm_info.mon_code,
(u64 *)__LC_MON_CODE);
break;
+ case PGM_VECTOR_PROCESSING:
case PGM_DATA:
rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
(u32 *)__LC_DATA_EXC_CODE);
@@ -551,6 +567,15 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
(u8 *)__LC_EXC_ACCESS_ID);
break;
+ case PGM_STACK_FULL:
+ case PGM_STACK_EMPTY:
+ case PGM_STACK_SPECIFICATION:
+ case PGM_STACK_TYPE:
+ case PGM_STACK_OPERATION:
+ case PGM_TRACE_TABEL:
+ case PGM_CRYPTO_OPERATION:
+ nullifying = true;
+ break;
}
if (pgm_info.code & PGM_PER) {
@@ -564,7 +589,12 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
(u8 *) __LC_PER_ACCESS_ID);
}
+ if (nullifying && vcpu->arch.sie_block->icptcode == ICPT_INST)
+ kvm_s390_rewind_psw(vcpu, ilc);
+
rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
+ rc |= put_guest_lc(vcpu, vcpu->arch.sie_block->gbea,
+ (u64 *) __LC_LAST_BREAK);
rc |= put_guest_lc(vcpu, pgm_info.code,
(u16 *)__LC_PGM_INT_CODE);
rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
@@ -574,16 +604,27 @@ static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
return rc ? -EFAULT : 0;
}
-static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu)
{
- int rc;
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_ext_info ext;
+ int rc = 0;
+
+ spin_lock(&fi->lock);
+ if (!(test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs))) {
+ spin_unlock(&fi->lock);
+ return 0;
+ }
+ ext = fi->srv_signal;
+ memset(&fi->srv_signal, 0, sizeof(ext));
+ clear_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
- inti->ext.ext_params);
+ ext.ext_params);
vcpu->stat.deliver_service_signal++;
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
- inti->ext.ext_params, 0);
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_SERVICE,
+ ext.ext_params, 0);
rc = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
@@ -591,106 +632,146 @@ static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
&vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ rc |= put_guest_lc(vcpu, ext.ext_params,
(u32 *)__LC_EXT_PARAMS);
+
return rc ? -EFAULT : 0;
}
-static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu)
{
- int rc;
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_interrupt_info *inti;
+ int rc = 0;
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
- KVM_S390_INT_PFAULT_DONE, 0,
- inti->ext.ext_params2);
+ spin_lock(&fi->lock);
+ inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_PFAULT],
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ KVM_S390_INT_PFAULT_DONE, 0,
+ inti->ext.ext_params2);
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_PFAULT] -= 1;
+ }
+ if (list_empty(&fi->lists[FIRQ_LIST_PFAULT]))
+ clear_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
- rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
- (u64 *)__LC_EXT_PARAMS2);
+ if (inti) {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, PFAULT_DONE,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
+ kfree(inti);
+ }
return rc ? -EFAULT : 0;
}
-static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu)
{
- int rc;
+ struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
+ struct kvm_s390_interrupt_info *inti;
+ int rc = 0;
- VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
- inti->ext.ext_params, inti->ext.ext_params2);
- vcpu->stat.deliver_virtio_interrupt++;
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
- inti->ext.ext_params,
- inti->ext.ext_params2);
+ spin_lock(&fi->lock);
+ inti = list_first_entry_or_null(&fi->lists[FIRQ_LIST_VIRTIO],
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ VCPU_EVENT(vcpu, 4,
+ "interrupt: virtio parm:%x,parm64:%llx",
+ inti->ext.ext_params, inti->ext.ext_params2);
+ vcpu->stat.deliver_virtio_interrupt++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ inti->type,
+ inti->ext.ext_params,
+ inti->ext.ext_params2);
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_VIRTIO] -= 1;
+ }
+ if (list_empty(&fi->lists[FIRQ_LIST_VIRTIO]))
+ clear_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
- rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
- rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
- rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= put_guest_lc(vcpu, inti->ext.ext_params,
- (u32 *)__LC_EXT_PARAMS);
- rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
- (u64 *)__LC_EXT_PARAMS2);
+ if (inti) {
+ rc = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
+ (u16 *)__LC_EXT_INT_CODE);
+ rc |= put_guest_lc(vcpu, VIRTIO_PARAM,
+ (u16 *)__LC_EXT_CPU_ADDR);
+ rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+ (u32 *)__LC_EXT_PARAMS);
+ rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+ (u64 *)__LC_EXT_PARAMS2);
+ kfree(inti);
+ }
return rc ? -EFAULT : 0;
}
static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
+ unsigned long irq_type)
{
- int rc;
+ struct list_head *isc_list;
+ struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int rc = 0;
- VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
- vcpu->stat.deliver_io_int++;
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
- ((__u32)inti->io.subchannel_id << 16) |
- inti->io.subchannel_nr,
- ((__u64)inti->io.io_int_parm << 32) |
- inti->io.io_int_word);
-
- rc = put_guest_lc(vcpu, inti->io.subchannel_id,
- (u16 *)__LC_SUBCHANNEL_ID);
- rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
- (u16 *)__LC_SUBCHANNEL_NR);
- rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
- (u32 *)__LC_IO_INT_PARM);
- rc |= put_guest_lc(vcpu, inti->io.io_int_word,
- (u32 *)__LC_IO_INT_WORD);
- rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- return rc ? -EFAULT : 0;
-}
+ fi = &vcpu->kvm->arch.float_int;
-static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
-{
- struct kvm_s390_mchk_info *mchk = &inti->mchk;
- int rc;
+ spin_lock(&fi->lock);
+ isc_list = &fi->lists[irq_type - IRQ_PEND_IO_ISC_0];
+ inti = list_first_entry_or_null(isc_list,
+ struct kvm_s390_interrupt_info,
+ list);
+ if (inti) {
+ VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+ vcpu->stat.deliver_io_int++;
+ trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+ inti->type,
+ ((__u32)inti->io.subchannel_id << 16) |
+ inti->io.subchannel_nr,
+ ((__u64)inti->io.io_int_parm << 32) |
+ inti->io.io_int_word);
+ list_del(&inti->list);
+ fi->counters[FIRQ_CNTR_IO] -= 1;
+ }
+ if (list_empty(isc_list))
+ clear_bit(irq_type, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+
+ if (inti) {
+ rc = put_guest_lc(vcpu, inti->io.subchannel_id,
+ (u16 *)__LC_SUBCHANNEL_ID);
+ rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+ (u16 *)__LC_SUBCHANNEL_NR);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+ (u32 *)__LC_IO_INT_PARM);
+ rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+ (u32 *)__LC_IO_INT_WORD);
+ rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+ &vcpu->arch.sie_block->gpsw,
+ sizeof(psw_t));
+ kfree(inti);
+ }
- VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
- mchk->mcic);
- trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
- mchk->cr14, mchk->mcic);
-
- rc = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
- rc |= put_guest_lc(vcpu, mchk->mcic,
- (u64 __user *) __LC_MCCK_CODE);
- rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
- (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
- rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
- &mchk->fixed_logout, sizeof(mchk->fixed_logout));
- rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
- rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
- &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
return rc ? -EFAULT : 0;
}
@@ -698,6 +779,7 @@ typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
static const deliver_irq_t deliver_irq_funcs[] = {
[IRQ_PEND_MCHK_EX] = __deliver_machine_check,
+ [IRQ_PEND_MCHK_REP] = __deliver_machine_check,
[IRQ_PEND_PROG] = __deliver_prog,
[IRQ_PEND_EXT_EMERGENCY] = __deliver_emergency_signal,
[IRQ_PEND_EXT_EXTERNAL] = __deliver_external_call,
@@ -706,36 +788,11 @@ static const deliver_irq_t deliver_irq_funcs[] = {
[IRQ_PEND_RESTART] = __deliver_restart,
[IRQ_PEND_SET_PREFIX] = __deliver_set_prefix,
[IRQ_PEND_PFAULT_INIT] = __deliver_pfault_init,
+ [IRQ_PEND_EXT_SERVICE] = __deliver_service,
+ [IRQ_PEND_PFAULT_DONE] = __deliver_pfault_done,
+ [IRQ_PEND_VIRTIO] = __deliver_virtio,
};
-static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
- struct kvm_s390_interrupt_info *inti)
-{
- int rc;
-
- switch (inti->type) {
- case KVM_S390_INT_SERVICE:
- rc = __deliver_service(vcpu, inti);
- break;
- case KVM_S390_INT_PFAULT_DONE:
- rc = __deliver_pfault_done(vcpu, inti);
- break;
- case KVM_S390_INT_VIRTIO:
- rc = __deliver_virtio(vcpu, inti);
- break;
- case KVM_S390_MCHK:
- rc = __deliver_mchk_floating(vcpu, inti);
- break;
- case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- rc = __deliver_io(vcpu, inti);
- break;
- default:
- BUG();
- }
-
- return rc;
-}
-
/* Check whether an external call is pending (deliverable or not) */
int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
{
@@ -751,21 +808,9 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu)
int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
{
- struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct kvm_s390_interrupt_info *inti;
int rc;
- rc = !!deliverable_local_irqs(vcpu);
-
- if ((!rc) && atomic_read(&fi->active)) {
- spin_lock(&fi->lock);
- list_for_each_entry(inti, &fi->list, list)
- if (__interrupt_is_deliverable(vcpu, inti)) {
- rc = 1;
- break;
- }
- spin_unlock(&fi->lock);
- }
+ rc = !!deliverable_irqs(vcpu);
if (!rc && kvm_cpu_has_pending_timer(vcpu))
rc = 1;
@@ -784,12 +829,7 @@ int kvm_s390_vcpu_has_irq(struct kvm_vcpu *vcpu, int exclude_stop)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- if (!(vcpu->arch.sie_block->ckc <
- get_tod_clock_fast() + vcpu->arch.sie_block->epoch))
- return 0;
- if (!ckc_interrupts_enabled(vcpu))
- return 0;
- return 1;
+ return ckc_irq_pending(vcpu) || cpu_timer_irq_pending(vcpu);
}
int kvm_s390_handle_wait(struct kvm_vcpu *vcpu)
@@ -884,60 +924,45 @@ void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
int __must_check kvm_s390_deliver_pending_interrupts(struct kvm_vcpu *vcpu)
{
struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
- struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
- struct kvm_s390_interrupt_info *n, *inti = NULL;
deliver_irq_t func;
- int deliver;
int rc = 0;
unsigned long irq_type;
- unsigned long deliverable_irqs;
+ unsigned long irqs;
__reset_intercept_indicators(vcpu);
/* pending ckc conditions might have been invalidated */
clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
- if (kvm_cpu_has_pending_timer(vcpu))
+ if (ckc_irq_pending(vcpu))
set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+ /* pending cpu timer conditions might have been invalidated */
+ clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+ if (cpu_timer_irq_pending(vcpu))
+ set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+
do {
- deliverable_irqs = deliverable_local_irqs(vcpu);
+ irqs = deliverable_irqs(vcpu);
/* bits are in the order of interrupt priority */
- irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+ irq_type = find_first_bit(&irqs, IRQ_PEND_COUNT);
if (irq_type == IRQ_PEND_COUNT)
break;
- func = deliver_irq_funcs[irq_type];
- if (!func) {
- WARN_ON_ONCE(func == NULL);
- clear_bit(irq_type, &li->pending_irqs);
- continue;
+ if (is_ioirq(irq_type)) {
+ rc = __deliver_io(vcpu, irq_type);
+ } else {
+ func = deliver_irq_funcs[irq_type];
+ if (!func) {
+ WARN_ON_ONCE(func == NULL);
+ clear_bit(irq_type, &li->pending_irqs);
+ continue;
+ }
+ rc = func(vcpu);
}
- rc = func(vcpu);
- } while (!rc && irq_type != IRQ_PEND_COUNT);
+ if (rc)
+ break;
+ } while (!rc);
- set_intercept_indicators_local(vcpu);
-
- if (!rc && atomic_read(&fi->active)) {
- do {
- deliver = 0;
- spin_lock(&fi->lock);
- list_for_each_entry_safe(inti, n, &fi->list, list) {
- if (__interrupt_is_deliverable(vcpu, inti)) {
- list_del(&inti->list);
- fi->irq_count--;
- deliver = 1;
- break;
- }
- __set_intercept_indicator(vcpu, inti);
- }
- if (list_empty(&fi->list))
- atomic_set(&fi->active, 0);
- spin_unlock(&fi->lock);
- if (deliver) {
- rc = __deliver_floating_interrupt(vcpu, inti);
- kfree(inti);
- }
- } while (!rc && deliver);
- }
+ set_intercept_indicators(vcpu);
return rc;
}
@@ -1172,80 +1197,182 @@ static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
return 0;
}
+static struct kvm_s390_interrupt_info *get_io_int(struct kvm *kvm,
+ int isc, u32 schid)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct list_head *isc_list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+ struct kvm_s390_interrupt_info *iter;
+ u16 id = (schid & 0xffff0000U) >> 16;
+ u16 nr = schid & 0x0000ffffU;
+ spin_lock(&fi->lock);
+ list_for_each_entry(iter, isc_list, list) {
+ if (schid && (id != iter->io.subchannel_id ||
+ nr != iter->io.subchannel_nr))
+ continue;
+ /* found an appropriate entry */
+ list_del_init(&iter->list);
+ fi->counters[FIRQ_CNTR_IO] -= 1;
+ if (list_empty(isc_list))
+ clear_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return iter;
+ }
+ spin_unlock(&fi->lock);
+ return NULL;
+}
+
+/*
+ * Dequeue and return an I/O interrupt matching any of the interruption
+ * subclasses as designated by the isc mask in cr6 and the schid (if != 0).
+ */
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
- u64 cr6, u64 schid)
+ u64 isc_mask, u32 schid)
+{
+ struct kvm_s390_interrupt_info *inti = NULL;
+ int isc;
+
+ for (isc = 0; isc <= MAX_ISC && !inti; isc++) {
+ if (isc_mask & isc_to_isc_bits(isc))
+ inti = get_io_int(kvm, isc, schid);
+ }
+ return inti;
+}
+
+#define SCCB_MASK 0xFFFFFFF8
+#define SCCB_EVENT_PENDING 0x3
+
+static int __inject_service(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_EVENT_PENDING;
+ /*
+ * Early versions of the QEMU s390 bios will inject several
+ * service interrupts after another without handling a
+ * condition code indicating busy.
+ * We will silently ignore those superfluous sccb values.
+ * A future version of QEMU will take care of serialization
+ * of servc requests
+ */
+ if (fi->srv_signal.ext_params & SCCB_MASK)
+ goto out;
+ fi->srv_signal.ext_params |= inti->ext.ext_params & SCCB_MASK;
+ set_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs);
+out:
+ spin_unlock(&fi->lock);
+ kfree(inti);
+ return 0;
+}
+
+static int __inject_virtio(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ if (fi->counters[FIRQ_CNTR_VIRTIO] >= KVM_S390_MAX_VIRTIO_IRQS) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
+ }
+ fi->counters[FIRQ_CNTR_VIRTIO] += 1;
+ list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_VIRTIO]);
+ set_bit(IRQ_PEND_VIRTIO, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+static int __inject_pfault_done(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ if (fi->counters[FIRQ_CNTR_PFAULT] >=
+ (ASYNC_PF_PER_VCPU * KVM_MAX_VCPUS)) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
+ }
+ fi->counters[FIRQ_CNTR_PFAULT] += 1;
+ list_add_tail(&inti->list, &fi->lists[FIRQ_LIST_PFAULT]);
+ set_bit(IRQ_PEND_PFAULT_DONE, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ return 0;
+}
+
+#define CR_PENDING_SUBCLASS 28
+static int __inject_float_mchk(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+
+ spin_lock(&fi->lock);
+ fi->mchk.cr14 |= inti->mchk.cr14 & (1UL << CR_PENDING_SUBCLASS);
+ fi->mchk.mcic |= inti->mchk.mcic;
+ set_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs);
+ spin_unlock(&fi->lock);
+ kfree(inti);
+ return 0;
+}
+
+static int __inject_io(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
{
struct kvm_s390_float_interrupt *fi;
- struct kvm_s390_interrupt_info *inti, *iter;
+ struct list_head *list;
+ int isc;
- if ((!schid && !cr6) || (schid && cr6))
- return NULL;
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
- inti = NULL;
- list_for_each_entry(iter, &fi->list, list) {
- if (!is_ioint(iter->type))
- continue;
- if (cr6 &&
- ((cr6 & int_word_to_isc_bits(iter->io.io_int_word)) == 0))
- continue;
- if (schid) {
- if (((schid & 0x00000000ffff0000) >> 16) !=
- iter->io.subchannel_id)
- continue;
- if ((schid & 0x000000000000ffff) !=
- iter->io.subchannel_nr)
- continue;
- }
- inti = iter;
- break;
- }
- if (inti) {
- list_del_init(&inti->list);
- fi->irq_count--;
+ if (fi->counters[FIRQ_CNTR_IO] >= KVM_S390_MAX_FLOAT_IRQS) {
+ spin_unlock(&fi->lock);
+ return -EBUSY;
}
- if (list_empty(&fi->list))
- atomic_set(&fi->active, 0);
+ fi->counters[FIRQ_CNTR_IO] += 1;
+
+ isc = int_word_to_isc(inti->io.io_int_word);
+ list = &fi->lists[FIRQ_LIST_IO_ISC_0 + isc];
+ list_add_tail(&inti->list, list);
+ set_bit(IRQ_PEND_IO_ISC_0 + isc, &fi->pending_irqs);
spin_unlock(&fi->lock);
- return inti;
+ return 0;
}
static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
{
struct kvm_s390_local_interrupt *li;
struct kvm_s390_float_interrupt *fi;
- struct kvm_s390_interrupt_info *iter;
struct kvm_vcpu *dst_vcpu = NULL;
int sigcpu;
- int rc = 0;
+ u64 type = READ_ONCE(inti->type);
+ int rc;
fi = &kvm->arch.float_int;
- spin_lock(&fi->lock);
- if (fi->irq_count >= KVM_S390_MAX_FLOAT_IRQS) {
+
+ switch (type) {
+ case KVM_S390_MCHK:
+ rc = __inject_float_mchk(kvm, inti);
+ break;
+ case KVM_S390_INT_VIRTIO:
+ rc = __inject_virtio(kvm, inti);
+ break;
+ case KVM_S390_INT_SERVICE:
+ rc = __inject_service(kvm, inti);
+ break;
+ case KVM_S390_INT_PFAULT_DONE:
+ rc = __inject_pfault_done(kvm, inti);
+ break;
+ case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+ rc = __inject_io(kvm, inti);
+ break;
+ default:
rc = -EINVAL;
- goto unlock_fi;
}
- fi->irq_count++;
- if (!is_ioint(inti->type)) {
- list_add_tail(&inti->list, &fi->list);
- } else {
- u64 isc_bits = int_word_to_isc_bits(inti->io.io_int_word);
+ if (rc)
+ return rc;
- /* Keep I/O interrupts sorted in isc order. */
- list_for_each_entry(iter, &fi->list, list) {
- if (!is_ioint(iter->type))
- continue;
- if (int_word_to_isc_bits(iter->io.io_int_word)
- <= isc_bits)
- continue;
- break;
- }
- list_add_tail(&inti->list, &iter->list);
- }
- atomic_set(&fi->active, 1);
- if (atomic_read(&kvm->online_vcpus) == 0)
- goto unlock_fi;
sigcpu = find_first_bit(fi->idle_mask, KVM_MAX_VCPUS);
if (sigcpu == KVM_MAX_VCPUS) {
do {
@@ -1257,7 +1384,7 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
li = &dst_vcpu->arch.local_int;
spin_lock(&li->lock);
- switch (inti->type) {
+ switch (type) {
case KVM_S390_MCHK:
atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
break;
@@ -1270,9 +1397,8 @@ static int __inject_vm(struct kvm *kvm, struct kvm_s390_interrupt_info *inti)
}
spin_unlock(&li->lock);
kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
-unlock_fi:
- spin_unlock(&fi->lock);
- return rc;
+ return 0;
+
}
int kvm_s390_inject_vm(struct kvm *kvm,
@@ -1332,10 +1458,10 @@ int kvm_s390_inject_vm(struct kvm *kvm,
return rc;
}
-void kvm_s390_reinject_io_int(struct kvm *kvm,
+int kvm_s390_reinject_io_int(struct kvm *kvm,
struct kvm_s390_interrupt_info *inti)
{
- __inject_vm(kvm, inti);
+ return __inject_vm(kvm, inti);
}
int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
@@ -1388,12 +1514,10 @@ void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu)
spin_unlock(&li->lock);
}
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+static int do_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
{
- struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
int rc;
- spin_lock(&li->lock);
switch (irq->type) {
case KVM_S390_PROGRAM_INT:
VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
@@ -1433,83 +1557,130 @@ int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
default:
rc = -EINVAL;
}
+
+ return rc;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ int rc;
+
+ spin_lock(&li->lock);
+ rc = do_inject_vcpu(vcpu, irq);
spin_unlock(&li->lock);
if (!rc)
kvm_s390_vcpu_wakeup(vcpu);
return rc;
}
-void kvm_s390_clear_float_irqs(struct kvm *kvm)
+static inline void clear_irq_list(struct list_head *_list)
{
- struct kvm_s390_float_interrupt *fi;
- struct kvm_s390_interrupt_info *n, *inti = NULL;
+ struct kvm_s390_interrupt_info *inti, *n;
- fi = &kvm->arch.float_int;
- spin_lock(&fi->lock);
- list_for_each_entry_safe(inti, n, &fi->list, list) {
+ list_for_each_entry_safe(inti, n, _list, list) {
list_del(&inti->list);
kfree(inti);
}
- fi->irq_count = 0;
- atomic_set(&fi->active, 0);
- spin_unlock(&fi->lock);
}
-static inline int copy_irq_to_user(struct kvm_s390_interrupt_info *inti,
- u8 *addr)
+static void inti_to_irq(struct kvm_s390_interrupt_info *inti,
+ struct kvm_s390_irq *irq)
{
- struct kvm_s390_irq __user *uptr = (struct kvm_s390_irq __user *) addr;
- struct kvm_s390_irq irq = {0};
-
- irq.type = inti->type;
+ irq->type = inti->type;
switch (inti->type) {
case KVM_S390_INT_PFAULT_INIT:
case KVM_S390_INT_PFAULT_DONE:
case KVM_S390_INT_VIRTIO:
- case KVM_S390_INT_SERVICE:
- irq.u.ext = inti->ext;
+ irq->u.ext = inti->ext;
break;
case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
- irq.u.io = inti->io;
+ irq->u.io = inti->io;
break;
- case KVM_S390_MCHK:
- irq.u.mchk = inti->mchk;
- break;
- default:
- return -EINVAL;
}
+}
- if (copy_to_user(uptr, &irq, sizeof(irq)))
- return -EFAULT;
+void kvm_s390_clear_float_irqs(struct kvm *kvm)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ int i;
- return 0;
-}
+ spin_lock(&fi->lock);
+ for (i = 0; i < FIRQ_LIST_COUNT; i++)
+ clear_irq_list(&fi->lists[i]);
+ for (i = 0; i < FIRQ_MAX_COUNT; i++)
+ fi->counters[i] = 0;
+ spin_unlock(&fi->lock);
+};
-static int get_all_floating_irqs(struct kvm *kvm, __u8 *buf, __u64 len)
+static int get_all_floating_irqs(struct kvm *kvm, u8 __user *usrbuf, u64 len)
{
struct kvm_s390_interrupt_info *inti;
struct kvm_s390_float_interrupt *fi;
+ struct kvm_s390_irq *buf;
+ struct kvm_s390_irq *irq;
+ int max_irqs;
int ret = 0;
int n = 0;
+ int i;
+
+ if (len > KVM_S390_FLIC_MAX_BUFFER || len == 0)
+ return -EINVAL;
+
+ /*
+ * We are already using -ENOMEM to signal
+ * userspace it may retry with a bigger buffer,
+ * so we need to use something else for this case
+ */
+ buf = vzalloc(len);
+ if (!buf)
+ return -ENOBUFS;
+
+ max_irqs = len / sizeof(struct kvm_s390_irq);
fi = &kvm->arch.float_int;
spin_lock(&fi->lock);
-
- list_for_each_entry(inti, &fi->list, list) {
- if (len < sizeof(struct kvm_s390_irq)) {
+ for (i = 0; i < FIRQ_LIST_COUNT; i++) {
+ list_for_each_entry(inti, &fi->lists[i], list) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out;
+ }
+ inti_to_irq(inti, &buf[n]);
+ n++;
+ }
+ }
+ if (test_bit(IRQ_PEND_EXT_SERVICE, &fi->pending_irqs)) {
+ if (n == max_irqs) {
/* signal userspace to try again */
ret = -ENOMEM;
- break;
+ goto out;
}
- ret = copy_irq_to_user(inti, buf);
- if (ret)
- break;
- buf += sizeof(struct kvm_s390_irq);
- len -= sizeof(struct kvm_s390_irq);
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_INT_SERVICE;
+ irq->u.ext = fi->srv_signal;
n++;
}
+ if (test_bit(IRQ_PEND_MCHK_REP, &fi->pending_irqs)) {
+ if (n == max_irqs) {
+ /* signal userspace to try again */
+ ret = -ENOMEM;
+ goto out;
+ }
+ irq = (struct kvm_s390_irq *) &buf[n];
+ irq->type = KVM_S390_MCHK;
+ irq->u.mchk = fi->mchk;
+ n++;
+}
+out:
spin_unlock(&fi->lock);
+ if (!ret && n > 0) {
+ if (copy_to_user(usrbuf, buf, sizeof(struct kvm_s390_irq) * n))
+ ret = -EFAULT;
+ }
+ vfree(buf);
return ret < 0 ? ret : n;
}
@@ -1520,7 +1691,7 @@ static int flic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
switch (attr->group) {
case KVM_DEV_FLIC_GET_ALL_IRQS:
- r = get_all_floating_irqs(dev->kvm, (u8 *) attr->addr,
+ r = get_all_floating_irqs(dev->kvm, (u8 __user *) attr->addr,
attr->attr);
break;
default:
@@ -1952,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
{
return -EINVAL;
}
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ struct kvm_s390_irq *buf;
+ int r = 0;
+ int n;
+
+ buf = vmalloc(len);
+ if (!buf)
+ return -ENOMEM;
+
+ if (copy_from_user((void *) buf, irqstate, len)) {
+ r = -EFAULT;
+ goto out_free;
+ }
+
+ /*
+ * Don't allow setting the interrupt state
+ * when there are already interrupts pending
+ */
+ spin_lock(&li->lock);
+ if (li->pending_irqs) {
+ r = -EBUSY;
+ goto out_unlock;
+ }
+
+ for (n = 0; n < len / sizeof(*buf); n++) {
+ r = do_inject_vcpu(vcpu, &buf[n]);
+ if (r)
+ break;
+ }
+
+out_unlock:
+ spin_unlock(&li->lock);
+out_free:
+ vfree(buf);
+
+ return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+ struct kvm_s390_irq *irq,
+ unsigned long irq_type)
+{
+ switch (irq_type) {
+ case IRQ_PEND_MCHK_EX:
+ case IRQ_PEND_MCHK_REP:
+ irq->type = KVM_S390_MCHK;
+ irq->u.mchk = li->irq.mchk;
+ break;
+ case IRQ_PEND_PROG:
+ irq->type = KVM_S390_PROGRAM_INT;
+ irq->u.pgm = li->irq.pgm;
+ break;
+ case IRQ_PEND_PFAULT_INIT:
+ irq->type = KVM_S390_INT_PFAULT_INIT;
+ irq->u.ext = li->irq.ext;
+ break;
+ case IRQ_PEND_EXT_EXTERNAL:
+ irq->type = KVM_S390_INT_EXTERNAL_CALL;
+ irq->u.extcall = li->irq.extcall;
+ break;
+ case IRQ_PEND_EXT_CLOCK_COMP:
+ irq->type = KVM_S390_INT_CLOCK_COMP;
+ break;
+ case IRQ_PEND_EXT_CPU_TIMER:
+ irq->type = KVM_S390_INT_CPU_TIMER;
+ break;
+ case IRQ_PEND_SIGP_STOP:
+ irq->type = KVM_S390_SIGP_STOP;
+ irq->u.stop = li->irq.stop;
+ break;
+ case IRQ_PEND_RESTART:
+ irq->type = KVM_S390_RESTART;
+ break;
+ case IRQ_PEND_SET_PREFIX:
+ irq->type = KVM_S390_SIGP_SET_PREFIX;
+ irq->u.prefix = li->irq.prefix;
+ break;
+ }
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+ uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+ unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+ unsigned long pending_irqs;
+ struct kvm_s390_irq irq;
+ unsigned long irq_type;
+ int cpuaddr;
+ int n = 0;
+
+ spin_lock(&li->lock);
+ pending_irqs = li->pending_irqs;
+ memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+ sizeof(sigp_emerg_pending));
+ spin_unlock(&li->lock);
+
+ for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+ memset(&irq, 0, sizeof(irq));
+ if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+ continue;
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+
+ if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+ for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+ memset(&irq, 0, sizeof(irq));
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ irq.type = KVM_S390_INT_EMERGENCY;
+ irq.u.emerg.code = cpuaddr;
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+ }
+
+ if ((sigp_ctrl & SIGP_CTRL_C) &&
+ (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+ CPUSTAT_ECALL_PEND)) {
+ if (n + sizeof(irq) > len)
+ return -ENOBUFS;
+ memset(&irq, 0, sizeof(irq));
+ irq.type = KVM_S390_INT_EXTERNAL_CALL;
+ irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+ if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+ return -EFAULT;
+ n += sizeof(irq);
+ }
+
+ return n;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0c3623927563..afa2bd750ffc 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,11 +25,13 @@
#include <linux/random.h>
#include <linux/slab.h>
#include <linux/timer.h>
+#include <linux/vmalloc.h>
#include <asm/asm-offsets.h>
#include <asm/lowcore.h>
#include <asm/pgtable.h>
#include <asm/nmi.h>
#include <asm/switch_to.h>
+#include <asm/isc.h>
#include <asm/sclp.h>
#include "kvm-s390.h"
#include "gaccess.h"
@@ -38,6 +40,11 @@
#include "trace.h"
#include "trace-s390.h"
+#define MEM_OP_MAX_SIZE 65536 /* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+ (KVM_MAX_VCPUS + LOCAL_IRQS))
+
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -87,6 +94,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
+ { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
@@ -101,8 +109,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
/* upper facilities limit for kvm */
unsigned long kvm_s390_fac_list_mask[] = {
- 0xff82fffbf4fc2000UL,
- 0x005c000000000000UL,
+ 0xffe6fffbfcfdfc40UL,
+ 0x205c800000000000UL,
};
unsigned long kvm_s390_fac_list_mask_size(void)
@@ -165,16 +173,22 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_ONE_REG:
case KVM_CAP_ENABLE_CAP:
case KVM_CAP_S390_CSS_SUPPORT:
- case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_DEVICE_CTRL:
case KVM_CAP_ENABLE_CAP_VM:
case KVM_CAP_S390_IRQCHIP:
case KVM_CAP_VM_ATTRIBUTES:
case KVM_CAP_MP_STATE:
+ case KVM_CAP_S390_INJECT_IRQ:
case KVM_CAP_S390_USER_SIGP:
+ case KVM_CAP_S390_USER_STSI:
+ case KVM_CAP_S390_SKEYS:
+ case KVM_CAP_S390_IRQ_STATE:
r = 1;
break;
+ case KVM_CAP_S390_MEM_OP:
+ r = MEM_OP_MAX_SIZE;
+ break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
r = KVM_MAX_VCPUS;
@@ -185,6 +199,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_COW:
r = MACHINE_HAS_ESOP;
break;
+ case KVM_CAP_S390_VECTOR_REGISTERS:
+ r = MACHINE_HAS_VX;
+ break;
default:
r = 0;
}
@@ -265,6 +282,18 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
kvm->arch.user_sigp = 1;
r = 0;
break;
+ case KVM_CAP_S390_VECTOR_REGISTERS:
+ if (MACHINE_HAS_VX) {
+ set_kvm_facility(kvm->arch.model.fac->mask, 129);
+ set_kvm_facility(kvm->arch.model.fac->list, 129);
+ r = 0;
+ } else
+ r = -EINVAL;
+ break;
+ case KVM_CAP_S390_USER_STSI:
+ kvm->arch.user_stsi = 1;
+ r = 0;
+ break;
default:
r = -EINVAL;
break;
@@ -522,7 +551,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
memcpy(&kvm->arch.model.cpu_id, &proc->cpuid,
sizeof(struct cpuid));
kvm->arch.model.ibc = proc->ibc;
- memcpy(kvm->arch.model.fac->kvm, proc->fac_list,
+ memcpy(kvm->arch.model.fac->list, proc->fac_list,
S390_ARCH_FAC_LIST_SIZE_BYTE);
} else
ret = -EFAULT;
@@ -556,7 +585,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
}
memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid));
proc->ibc = kvm->arch.model.ibc;
- memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE);
+ memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
ret = -EFAULT;
kfree(proc);
@@ -576,10 +605,10 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
}
get_cpu_id((struct cpuid *) &mach->cpuid);
mach->ibc = sclp_get_ibc();
- memcpy(&mach->fac_mask, kvm_s390_fac_list_mask,
- kvm_s390_fac_list_mask_size() * sizeof(u64));
+ memcpy(&mach->fac_mask, kvm->arch.model.fac->mask,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
- S390_ARCH_FAC_LIST_SIZE_U64);
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
ret = -EFAULT;
kfree(mach);
@@ -709,6 +738,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
return ret;
}
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+ uint8_t *keys;
+ uint64_t hva;
+ unsigned long curkey;
+ int i, r = 0;
+
+ if (args->flags != 0)
+ return -EINVAL;
+
+ /* Is this guest using storage keys? */
+ if (!mm_use_skey(current->mm))
+ return KVM_S390_GET_SKEYS_NONE;
+
+ /* Enforce sane limit on memory allocation */
+ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+ return -EINVAL;
+
+ keys = kmalloc_array(args->count, sizeof(uint8_t),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!keys)
+ keys = vmalloc(sizeof(uint8_t) * args->count);
+ if (!keys)
+ return -ENOMEM;
+
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ curkey = get_guest_storage_key(current->mm, hva);
+ if (IS_ERR_VALUE(curkey)) {
+ r = curkey;
+ goto out;
+ }
+ keys[i] = curkey;
+ }
+
+ r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+ sizeof(uint8_t) * args->count);
+ if (r)
+ r = -EFAULT;
+out:
+ kvfree(keys);
+ return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+ uint8_t *keys;
+ uint64_t hva;
+ int i, r = 0;
+
+ if (args->flags != 0)
+ return -EINVAL;
+
+ /* Enforce sane limit on memory allocation */
+ if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+ return -EINVAL;
+
+ keys = kmalloc_array(args->count, sizeof(uint8_t),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!keys)
+ keys = vmalloc(sizeof(uint8_t) * args->count);
+ if (!keys)
+ return -ENOMEM;
+
+ r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+ sizeof(uint8_t) * args->count);
+ if (r) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ /* Enable storage key handling for the guest */
+ s390_enable_skey();
+
+ for (i = 0; i < args->count; i++) {
+ hva = gfn_to_hva(kvm, args->start_gfn + i);
+ if (kvm_is_error_hva(hva)) {
+ r = -EFAULT;
+ goto out;
+ }
+
+ /* Lowest order bit is reserved */
+ if (keys[i] & 0x01) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ r = set_guest_storage_key(current->mm, hva,
+ (unsigned long)keys[i], 0);
+ if (r)
+ goto out;
+ }
+out:
+ kvfree(keys);
+ return r;
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -768,6 +899,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_s390_vm_has_attr(kvm, &attr);
break;
}
+ case KVM_S390_GET_SKEYS: {
+ struct kvm_s390_skeys args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp,
+ sizeof(struct kvm_s390_skeys)))
+ break;
+ r = kvm_s390_get_skeys(kvm, &args);
+ break;
+ }
+ case KVM_S390_SET_SKEYS: {
+ struct kvm_s390_skeys args;
+
+ r = -EFAULT;
+ if (copy_from_user(&args, argp,
+ sizeof(struct kvm_s390_skeys)))
+ break;
+ r = kvm_s390_set_skeys(kvm, &args);
+ break;
+ }
default:
r = -ENOTTY;
}
@@ -778,15 +929,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
static int kvm_s390_query_ap_config(u8 *config)
{
u32 fcn_code = 0x04000000UL;
- u32 cc;
+ u32 cc = 0;
+ memset(config, 0, 128);
asm volatile(
"lgr 0,%1\n"
"lgr 2,%2\n"
".long 0xb2af0000\n" /* PQAP(QCI) */
- "ipm %0\n"
+ "0: ipm %0\n"
"srl %0,28\n"
- : "=r" (cc)
+ "1:\n"
+ EX_TABLE(0b, 1b)
+ : "+r" (cc)
: "r" (fcn_code), "r" (config)
: "cc", "0", "2", "memory"
);
@@ -839,9 +993,13 @@ static int kvm_s390_crypto_init(struct kvm *kvm)
kvm_s390_set_crycb_format(kvm);
- /* Disable AES/DEA protected key functions by default */
- kvm->arch.crypto.aes_kw = 0;
- kvm->arch.crypto.dea_kw = 0;
+ /* Enable AES/DEA protected key functions by default */
+ kvm->arch.crypto.aes_kw = 1;
+ kvm->arch.crypto.dea_kw = 1;
+ get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
+ get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
+ sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
return 0;
}
@@ -881,53 +1039,43 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.dbf = debug_register(debug_name, 8, 2, 8 * sizeof(long));
if (!kvm->arch.dbf)
- goto out_nodbf;
+ goto out_err;
/*
* The architectural maximum amount of facilities is 16 kbit. To store
* this amount, 2 kbyte of memory is required. Thus we need a full
- * page to hold the active copy (arch.model.fac->sie) and the current
- * facilities set (arch.model.fac->kvm). Its address size has to be
+ * page to hold the guest facility list (arch.model.fac->list) and the
+ * facility mask (arch.model.fac->mask). Its address size has to be
* 31 bits and word aligned.
*/
kvm->arch.model.fac =
- (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
+ (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!kvm->arch.model.fac)
- goto out_nofac;
-
- memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list,
- S390_ARCH_FAC_LIST_SIZE_U64);
-
- /*
- * If this KVM host runs *not* in a LPAR, relax the facility bits
- * of the kvm facility mask by all missing facilities. This will allow
- * to determine the right CPU model by means of the remaining facilities.
- * Live guest migration must prohibit the migration of KVMs running in
- * a LPAR to non LPAR hosts.
- */
- if (!MACHINE_IS_LPAR)
- for (i = 0; i < kvm_s390_fac_list_mask_size(); i++)
- kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i];
+ goto out_err;
- /*
- * Apply the kvm facility mask to limit the kvm supported/tolerated
- * facility list.
- */
+ /* Populate the facility mask initially. */
+ memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
if (i < kvm_s390_fac_list_mask_size())
- kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i];
+ kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i];
else
- kvm->arch.model.fac->kvm[i] = 0UL;
+ kvm->arch.model.fac->mask[i] = 0UL;
}
+ /* Populate the facility list initially. */
+ memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask,
+ S390_ARCH_FAC_LIST_SIZE_BYTE);
+
kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id);
kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff;
if (kvm_s390_crypto_init(kvm) < 0)
- goto out_crypto;
+ goto out_err;
spin_lock_init(&kvm->arch.float_int.lock);
- INIT_LIST_HEAD(&kvm->arch.float_int.list);
+ for (i = 0; i < FIRQ_LIST_COUNT; i++)
+ INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
init_waitqueue_head(&kvm->arch.ipte_wq);
mutex_init(&kvm->arch.ipte_mutex);
@@ -939,7 +1087,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
} else {
kvm->arch.gmap = gmap_alloc(current->mm, (1UL << 44) - 1);
if (!kvm->arch.gmap)
- goto out_nogmap;
+ goto out_err;
kvm->arch.gmap->private = kvm;
kvm->arch.gmap->pfault_enabled = 0;
}
@@ -951,15 +1099,11 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
spin_lock_init(&kvm->arch.start_stop_lock);
return 0;
-out_nogmap:
+out_err:
kfree(kvm->arch.crypto.crycb);
-out_crypto:
free_page((unsigned long)kvm->arch.model.fac);
-out_nofac:
debug_unregister(kvm->arch.dbf);
-out_nodbf:
free_page((unsigned long)(kvm->arch.sca));
-out_err:
return rc;
}
@@ -1039,6 +1183,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
KVM_SYNC_CRS |
KVM_SYNC_ARCH0 |
KVM_SYNC_PFAULT;
+ if (test_kvm_facility(vcpu->kvm, 129))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
if (kvm_is_ucontrol(vcpu->kvm))
return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1049,10 +1195,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- save_fp_regs(vcpu->arch.host_fpregs.fprs);
+ if (test_kvm_facility(vcpu->kvm, 129))
+ save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+ else
+ save_fp_regs(vcpu->arch.host_fpregs.fprs);
save_access_regs(vcpu->arch.host_acrs);
- restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ restore_fp_ctl(&vcpu->run->s.regs.fpc);
+ restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+ } else {
+ restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ }
restore_access_regs(vcpu->run->s.regs.acrs);
gmap_enable(vcpu->arch.gmap);
atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1062,11 +1216,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
{
atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
gmap_disable(vcpu->arch.gmap);
- save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
- save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ save_fp_ctl(&vcpu->run->s.regs.fpc);
+ save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+ } else {
+ save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+ save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+ }
save_access_regs(vcpu->run->s.regs.acrs);
restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
- restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+ if (test_kvm_facility(vcpu->kvm, 129))
+ restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+ else
+ restore_fp_regs(vcpu->arch.host_fpregs.fprs);
restore_access_regs(vcpu->arch.host_acrs);
}
@@ -1134,6 +1296,15 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
return 0;
}
+static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
+{
+ struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
+
+ vcpu->arch.cpu_id = model->cpu_id;
+ vcpu->arch.sie_block->ibc = model->ibc;
+ vcpu->arch.sie_block->fac = (int) (long) model->fac->list;
+}
+
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
{
int rc = 0;
@@ -1142,6 +1313,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
CPUSTAT_SM |
CPUSTAT_STOPPED |
CPUSTAT_GED);
+ kvm_s390_vcpu_setup_model(vcpu);
+
vcpu->arch.sie_block->ecb = 6;
if (test_kvm_facility(vcpu->kvm, 50) && test_kvm_facility(vcpu->kvm, 73))
vcpu->arch.sie_block->ecb |= 0x10;
@@ -1152,8 +1325,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.sie_block->eca |= 1;
if (sclp_has_sigpif())
vcpu->arch.sie_block->eca |= 0x10000000U;
- vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE |
- ICTL_TPROT;
+ if (test_kvm_facility(vcpu->kvm, 129)) {
+ vcpu->arch.sie_block->eca |= 0x00020000;
+ vcpu->arch.sie_block->ecd |= 0x20000000;
+ }
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (kvm_s390_cmma_enabled(vcpu->kvm)) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -1163,13 +1339,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
- mutex_lock(&vcpu->kvm->lock);
- vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id;
- memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm,
- S390_ARCH_FAC_LIST_SIZE_BYTE);
- vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc;
- mutex_unlock(&vcpu->kvm->lock);
-
kvm_s390_vcpu_crypto_setup(vcpu);
return rc;
@@ -1197,6 +1366,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block = &sie_page->sie_block;
vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+ vcpu->arch.host_vregs = &sie_page->vregs;
vcpu->arch.sie_block->icpua = id;
if (!kvm_is_ucontrol(kvm)) {
@@ -1212,7 +1382,6 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca;
set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn);
}
- vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie;
spin_lock_init(&vcpu->arch.local_int.lock);
vcpu->arch.local_int.float_int = &kvm->arch.float_int;
@@ -1732,6 +1901,31 @@ static int vcpu_pre_run(struct kvm_vcpu *vcpu)
return 0;
}
+static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
+{
+ psw_t *psw = &vcpu->arch.sie_block->gpsw;
+ u8 opcode;
+ int rc;
+
+ VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+ trace_kvm_s390_sie_fault(vcpu);
+
+ /*
+ * We want to inject an addressing exception, which is defined as a
+ * suppressing or terminating exception. However, since we came here
+ * by a DAT access exception, the PSW still points to the faulting
+ * instruction since DAT exceptions are nullifying. So we've got
+ * to look up the current opcode to get the length of the instruction
+ * to be able to forward the PSW.
+ */
+ rc = read_guest(vcpu, psw->addr, 0, &opcode, 1);
+ if (rc)
+ return kvm_s390_inject_prog_cond(vcpu, rc);
+ psw->addr = __rewind_psw(*psw, -insn_length(opcode));
+
+ return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
+}
+
static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
{
int rc = -1;
@@ -1763,11 +1957,8 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
}
}
- if (rc == -1) {
- VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
- trace_kvm_s390_sie_fault(vcpu);
- rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
- }
+ if (rc == -1)
+ rc = vcpu_post_run_fault_in_sie(vcpu);
memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -1983,6 +2174,35 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
return kvm_s390_store_status_unloaded(vcpu, addr);
}
+/*
+ * store additional status at address
+ */
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+ unsigned long gpa)
+{
+ /* Only bits 0-53 are used for address formation */
+ if (!(gpa & ~0x3ff))
+ return 0;
+
+ return write_guest_abs(vcpu, gpa & ~0x3ff,
+ (void *)&vcpu->run->s.regs.vrs, 512);
+}
+
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+ if (!test_kvm_facility(vcpu->kvm, 129))
+ return 0;
+
+ /*
+ * The guest VXRS are in the host VXRs due to the lazy
+ * copying in vcpu load/put. Let's update our copies before we save
+ * it into the save area.
+ */
+ save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+
+ return kvm_s390_store_adtl_status_unloaded(vcpu, addr);
+}
+
static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
{
kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
@@ -2107,6 +2327,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return r;
}
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+ struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+ const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+ | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+ if (mop->flags & ~supported_flags)
+ return -EINVAL;
+
+ if (mop->size > MEM_OP_MAX_SIZE)
+ return -E2BIG;
+
+ if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+ tmpbuf = vmalloc(mop->size);
+ if (!tmpbuf)
+ return -ENOMEM;
+ }
+
+ srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+ break;
+ }
+ r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ if (r == 0) {
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ }
+ break;
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+ break;
+ }
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ break;
+ }
+ r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+ break;
+ default:
+ r = -EINVAL;
+ }
+
+ srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+ if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+ kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+ vfree(tmpbuf);
+ return r;
+}
+
long kvm_arch_vcpu_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -2116,6 +2395,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
long r;
switch (ioctl) {
+ case KVM_S390_IRQ: {
+ struct kvm_s390_irq s390irq;
+
+ r = -EFAULT;
+ if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+ break;
+ r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+ break;
+ }
case KVM_S390_INTERRUPT: {
struct kvm_s390_interrupt s390int;
struct kvm_s390_irq s390irq;
@@ -2206,6 +2494,47 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
break;
}
+ case KVM_S390_MEM_OP: {
+ struct kvm_s390_mem_op mem_op;
+
+ if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+ r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+ else
+ r = -EFAULT;
+ break;
+ }
+ case KVM_S390_SET_IRQ_STATE: {
+ struct kvm_s390_irq_state irq_state;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+ break;
+ if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+ irq_state.len == 0 ||
+ irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+ r = -EINVAL;
+ break;
+ }
+ r = kvm_s390_set_irq_state(vcpu,
+ (void __user *) irq_state.buf,
+ irq_state.len);
+ break;
+ }
+ case KVM_S390_GET_IRQ_STATE: {
+ struct kvm_s390_irq_state irq_state;
+
+ r = -EFAULT;
+ if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+ break;
+ if (irq_state.len == 0) {
+ r = -EINVAL;
+ break;
+ }
+ r = kvm_s390_get_irq_state(vcpu,
+ (__u8 __user *) irq_state.buf,
+ irq_state.len);
+ break;
+ }
default:
r = -ENOTTY;
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 985c2114d7ef..ca108b90ae56 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -70,16 +70,22 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix)
kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
}
-static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu)
+typedef u8 __bitwise ar_t;
+
+static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar)
{
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ if (ar)
+ *ar = base2;
+
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
- u64 *address1, u64 *address2)
+ u64 *address1, u64 *address2,
+ ar_t *ar_b1, ar_t *ar_b2)
{
u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28;
u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16;
@@ -88,6 +94,11 @@ static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu,
*address1 = (base1 ? vcpu->run->s.regs.gprs[base1] : 0) + disp1;
*address2 = (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
+
+ if (ar_b1)
+ *ar_b1 = base1;
+ if (ar_b2)
+ *ar_b2 = base2;
}
static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2)
@@ -98,7 +109,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2
*r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16;
}
-static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar)
{
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) +
@@ -107,14 +118,20 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu)
if (disp2 & 0x80000)
disp2+=0xfff00000;
+ if (ar)
+ *ar = base2;
+
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2;
}
-static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu)
+static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar)
{
u32 base2 = vcpu->arch.sie_block->ipb >> 28;
u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16);
+ if (ar)
+ *ar = base2;
+
return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + disp2;
}
@@ -125,10 +142,22 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc)
vcpu->arch.sie_block->gpsw.mask |= cc << 44;
}
-/* test availability of facility in a kvm intance */
+/* test availability of facility in a kvm instance */
static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr)
{
- return __test_facility(nr, kvm->arch.model.fac->kvm);
+ return __test_facility(nr, kvm->arch.model.fac->mask) &&
+ __test_facility(nr, kvm->arch.model.fac->list);
+}
+
+static inline int set_kvm_facility(u64 *fac_list, unsigned long nr)
+{
+ unsigned char *ptr;
+
+ if (nr >= MAX_FACILITY_BIT)
+ return -EINVAL;
+ ptr = (unsigned char *) fac_list + (nr >> 3);
+ *ptr |= (0x80UL >> (nr & 7));
+ return 0;
}
/* are cpu states controlled by user space */
@@ -149,9 +178,9 @@ int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
struct kvm_s390_irq *irq);
int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
- u64 cr6, u64 schid);
-void kvm_s390_reinject_io_int(struct kvm *kvm,
- struct kvm_s390_interrupt_info *inti);
+ u64 isc_mask, u32 schid);
+int kvm_s390_reinject_io_int(struct kvm *kvm,
+ struct kvm_s390_interrupt_info *inti);
int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
/* implemented in intercept.c */
@@ -176,7 +205,10 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu);
/* implemented in kvm-s390.c */
long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable);
int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_store_adtl_status_unloaded(struct kvm_vcpu *vcpu,
+ unsigned long addr);
int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_adtl_status(struct kvm_vcpu *vcpu, unsigned long addr);
void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu);
void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu);
void s390_vcpu_block(struct kvm_vcpu *vcpu);
@@ -240,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
extern struct kvm_device_ops kvm_flic_ops;
int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+ void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+ __u8 __user *buf, int len);
/* implemented in guestdbg.c */
void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index bdd9b5b17e03..d22d8ee1ff9d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -36,15 +36,16 @@ static int handle_set_clock(struct kvm_vcpu *vcpu)
struct kvm_vcpu *cpup;
s64 hostclk, val;
int i, rc;
+ ar_t ar;
u64 op2;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- op2 = kvm_s390_get_base_disp_s(vcpu);
+ op2 = kvm_s390_get_base_disp_s(vcpu, &ar);
if (op2 & 7) /* Operand must be on a doubleword boundary */
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, op2, &val, sizeof(val));
+ rc = read_guest(vcpu, op2, ar, &val, sizeof(val));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
@@ -68,20 +69,21 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu)
u64 operand2;
u32 address;
int rc;
+ ar_t ar;
vcpu->stat.instruction_spx++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
/* must be word boundary */
if (operand2 & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* get the value */
- rc = read_guest(vcpu, operand2, &address, sizeof(address));
+ rc = read_guest(vcpu, operand2, ar, &address, sizeof(address));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
@@ -107,13 +109,14 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
u64 operand2;
u32 address;
int rc;
+ ar_t ar;
vcpu->stat.instruction_stpx++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
/* must be word boundary */
if (operand2 & 3)
@@ -122,7 +125,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu)
address = kvm_s390_get_prefix(vcpu);
/* get the value */
- rc = write_guest(vcpu, operand2, &address, sizeof(address));
+ rc = write_guest(vcpu, operand2, ar, &address, sizeof(address));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
@@ -136,18 +139,19 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
u16 vcpu_id = vcpu->vcpu_id;
u64 ga;
int rc;
+ ar_t ar;
vcpu->stat.instruction_stap++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- ga = kvm_s390_get_base_disp_s(vcpu);
+ ga = kvm_s390_get_base_disp_s(vcpu, &ar);
if (ga & 1)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = write_guest(vcpu, ga, &vcpu_id, sizeof(vcpu_id));
+ rc = write_guest(vcpu, ga, ar, &vcpu_id, sizeof(vcpu_id));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
@@ -207,7 +211,7 @@ static int handle_test_block(struct kvm_vcpu *vcpu)
kvm_s390_get_regs_rre(vcpu, NULL, &reg2);
addr = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
addr = kvm_s390_logical_to_effective(vcpu, addr);
- if (kvm_s390_check_low_addr_protection(vcpu, addr))
+ if (kvm_s390_check_low_addr_prot_real(vcpu, addr))
return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
addr = kvm_s390_real_to_abs(vcpu, addr);
@@ -229,18 +233,20 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
struct kvm_s390_interrupt_info *inti;
unsigned long len;
u32 tpi_data[3];
- int cc, rc;
+ int rc;
u64 addr;
+ ar_t ar;
- rc = 0;
- addr = kvm_s390_get_base_disp_s(vcpu);
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- cc = 0;
+
inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
- if (!inti)
- goto no_interrupt;
- cc = 1;
+ if (!inti) {
+ kvm_s390_set_psw_cc(vcpu, 0);
+ return 0;
+ }
+
tpi_data[0] = inti->io.subchannel_id << 16 | inti->io.subchannel_nr;
tpi_data[1] = inti->io.io_int_parm;
tpi_data[2] = inti->io.io_int_word;
@@ -250,40 +256,51 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
* provided area.
*/
len = sizeof(tpi_data) - 4;
- rc = write_guest(vcpu, addr, &tpi_data, len);
- if (rc)
- return kvm_s390_inject_prog_cond(vcpu, rc);
+ rc = write_guest(vcpu, addr, ar, &tpi_data, len);
+ if (rc) {
+ rc = kvm_s390_inject_prog_cond(vcpu, rc);
+ goto reinject_interrupt;
+ }
} else {
/*
* Store the three-word I/O interruption code into
* the appropriate lowcore area.
*/
len = sizeof(tpi_data);
- if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len))
+ if (write_guest_lc(vcpu, __LC_SUBCHANNEL_ID, &tpi_data, len)) {
+ /* failed writes to the low core are not recoverable */
rc = -EFAULT;
+ goto reinject_interrupt;
+ }
}
+
+ /* irq was successfully handed to the guest */
+ kfree(inti);
+ kvm_s390_set_psw_cc(vcpu, 1);
+ return 0;
+reinject_interrupt:
/*
* If we encounter a problem storing the interruption code, the
* instruction is suppressed from the guest's view: reinject the
* interrupt.
*/
- if (!rc)
+ if (kvm_s390_reinject_io_int(vcpu->kvm, inti)) {
kfree(inti);
- else
- kvm_s390_reinject_io_int(vcpu->kvm, inti);
-no_interrupt:
- /* Set condition code and we're done. */
- if (!rc)
- kvm_s390_set_psw_cc(vcpu, cc);
+ rc = -EFAULT;
+ }
+ /* don't set the cc, a pgm irq was injected or we drop to user space */
return rc ? -EFAULT : 0;
}
static int handle_tsch(struct kvm_vcpu *vcpu)
{
- struct kvm_s390_interrupt_info *inti;
+ struct kvm_s390_interrupt_info *inti = NULL;
+ const u64 isc_mask = 0xffUL << 24; /* all iscs set */
- inti = kvm_s390_get_io_int(vcpu->kvm, 0,
- vcpu->run->s.regs.gprs[1]);
+ /* a valid schid has at least one bit set */
+ if (vcpu->run->s.regs.gprs[1])
+ inti = kvm_s390_get_io_int(vcpu->kvm, isc_mask,
+ vcpu->run->s.regs.gprs[1]);
/*
* Prepare exit to userspace.
@@ -348,7 +365,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu)
* We need to shift the lower 32 facility bits (bit 0-31) from a u64
* into a u32 memory representation. They will remain bits 0-31.
*/
- fac = *vcpu->kvm->arch.model.fac->sie >> 32;
+ fac = *vcpu->kvm->arch.model.fac->list >> 32;
rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list),
&fac, sizeof(fac));
if (rc)
@@ -386,15 +403,16 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu)
psw_compat_t new_psw;
u64 addr;
int rc;
+ ar_t ar;
if (gpsw->mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- addr = kvm_s390_get_base_disp_s(vcpu);
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
if (!(new_psw.mask & PSW32_MASK_BASE))
@@ -412,14 +430,15 @@ static int handle_lpswe(struct kvm_vcpu *vcpu)
psw_t new_psw;
u64 addr;
int rc;
+ ar_t ar;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- addr = kvm_s390_get_base_disp_s(vcpu);
+ addr = kvm_s390_get_base_disp_s(vcpu, &ar);
if (addr & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = read_guest(vcpu, addr, &new_psw, sizeof(new_psw));
+ rc = read_guest(vcpu, addr, ar, &new_psw, sizeof(new_psw));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
vcpu->arch.sie_block->gpsw = new_psw;
@@ -433,18 +452,19 @@ static int handle_stidp(struct kvm_vcpu *vcpu)
u64 stidp_data = vcpu->arch.stidp_data;
u64 operand2;
int rc;
+ ar_t ar;
vcpu->stat.instruction_stidp++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
if (operand2 & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
- rc = write_guest(vcpu, operand2, &stidp_data, sizeof(stidp_data));
+ rc = write_guest(vcpu, operand2, ar, &stidp_data, sizeof(stidp_data));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
@@ -467,6 +487,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
for (n = mem->count - 1; n > 0 ; n--)
memcpy(&mem->vm[n], &mem->vm[n - 1], sizeof(mem->vm[0]));
+ memset(&mem->vm[0], 0, sizeof(mem->vm[0]));
mem->vm[0].cpus_total = cpus;
mem->vm[0].cpus_configured = cpus;
mem->vm[0].cpus_standby = 0;
@@ -478,6 +499,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
ASCEBC(mem->vm[0].cpi, 16);
}
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+ u8 fc, u8 sel1, u16 sel2)
+{
+ vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+ vcpu->run->s390_stsi.addr = addr;
+ vcpu->run->s390_stsi.ar = ar;
+ vcpu->run->s390_stsi.fc = fc;
+ vcpu->run->s390_stsi.sel1 = sel1;
+ vcpu->run->s390_stsi.sel2 = sel2;
+}
+
static int handle_stsi(struct kvm_vcpu *vcpu)
{
int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -486,6 +518,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
unsigned long mem = 0;
u64 operand2;
int rc = 0;
+ ar_t ar;
vcpu->stat.instruction_stsi++;
VCPU_EVENT(vcpu, 4, "stsi: fc: %x sel1: %x sel2: %x", fc, sel1, sel2);
@@ -508,7 +541,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
return 0;
}
- operand2 = kvm_s390_get_base_disp_s(vcpu);
+ operand2 = kvm_s390_get_base_disp_s(vcpu, &ar);
if (operand2 & 0xfff)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -532,16 +565,20 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
break;
}
- rc = write_guest(vcpu, operand2, (void *)mem, PAGE_SIZE);
+ rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
if (rc) {
rc = kvm_s390_inject_prog_cond(vcpu, rc);
goto out;
}
+ if (vcpu->kvm->arch.user_stsi) {
+ insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+ rc = -EREMOTE;
+ }
trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
free_page(mem);
kvm_s390_set_psw_cc(vcpu, 0);
vcpu->run->s.regs.gprs[0] = 0;
- return 0;
+ return rc;
out_no_data:
kvm_s390_set_psw_cc(vcpu, 3);
out:
@@ -670,7 +707,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
- if (kvm_s390_check_low_addr_protection(vcpu, start))
+ if (kvm_s390_check_low_addr_prot_real(vcpu, start))
return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
}
@@ -776,13 +813,14 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
int reg, rc, nr_regs;
u32 ctl_array[16];
u64 ga;
+ ar_t ar;
vcpu->stat.instruction_lctl++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- ga = kvm_s390_get_base_disp_rs(vcpu);
+ ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -791,7 +829,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
nr_regs = ((reg3 - reg1) & 0xf) + 1;
- rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+ rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
reg = reg1;
@@ -814,13 +852,14 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
int reg, rc, nr_regs;
u32 ctl_array[16];
u64 ga;
+ ar_t ar;
vcpu->stat.instruction_stctl++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- ga = kvm_s390_get_base_disp_rs(vcpu);
+ ga = kvm_s390_get_base_disp_rs(vcpu, &ar);
if (ga & 3)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -836,7 +875,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu)
break;
reg = (reg + 1) % 16;
} while (1);
- rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+ rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u32));
return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
}
@@ -847,13 +886,14 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
int reg, rc, nr_regs;
u64 ctl_array[16];
u64 ga;
+ ar_t ar;
vcpu->stat.instruction_lctlg++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- ga = kvm_s390_get_base_disp_rsy(vcpu);
+ ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -862,7 +902,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
nr_regs = ((reg3 - reg1) & 0xf) + 1;
- rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+ rc = read_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
if (rc)
return kvm_s390_inject_prog_cond(vcpu, rc);
reg = reg1;
@@ -884,13 +924,14 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
int reg, rc, nr_regs;
u64 ctl_array[16];
u64 ga;
+ ar_t ar;
vcpu->stat.instruction_stctg++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- ga = kvm_s390_get_base_disp_rsy(vcpu);
+ ga = kvm_s390_get_base_disp_rsy(vcpu, &ar);
if (ga & 7)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
@@ -906,7 +947,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu)
break;
reg = (reg + 1) % 16;
} while (1);
- rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+ rc = write_guest(vcpu, ga, ar, ctl_array, nr_regs * sizeof(u64));
return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
}
@@ -931,13 +972,14 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
unsigned long hva, gpa;
int ret = 0, cc = 0;
bool writable;
+ ar_t ar;
vcpu->stat.instruction_tprot++;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- kvm_s390_get_base_disp_sse(vcpu, &address1, &address2);
+ kvm_s390_get_base_disp_sse(vcpu, &address1, &address2, &ar, NULL);
/* we only handle the Linux memory detection case:
* access key == 0
@@ -946,11 +988,11 @@ static int handle_tprot(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_DAT)
ipte_lock(vcpu);
- ret = guest_translate_address(vcpu, address1, &gpa, 1);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, 1);
if (ret == PGM_PROTECTION) {
/* Write protected? Try again with read-only... */
cc = 1;
- ret = guest_translate_address(vcpu, address1, &gpa, 0);
+ ret = guest_translate_address(vcpu, address1, ar, &gpa, 0);
}
if (ret) {
if (ret == PGM_ADDRESSING || ret == PGM_TRANSLATION_SPEC) {
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 23b1e86b2122..72e58bd2bee7 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -393,6 +393,9 @@ static int handle_sigp_order_in_user_space(struct kvm_vcpu *vcpu, u8 order_code)
case SIGP_STORE_STATUS_AT_ADDRESS:
vcpu->stat.instruction_sigp_store_status++;
break;
+ case SIGP_STORE_ADDITIONAL_STATUS:
+ vcpu->stat.instruction_sigp_store_adtl_status++;
+ break;
case SIGP_SET_PREFIX:
vcpu->stat.instruction_sigp_prefix++;
break;
@@ -431,7 +434,7 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
- order_code = kvm_s390_get_base_disp_rs(vcpu);
+ order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
if (handle_sigp_order_in_user_space(vcpu, order_code))
return -EOPNOTSUPP;
@@ -473,7 +476,7 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
int r3 = vcpu->arch.sie_block->ipa & 0x000f;
u16 cpu_addr = vcpu->run->s.regs.gprs[r3];
struct kvm_vcpu *dest_vcpu;
- u8 order_code = kvm_s390_get_base_disp_rs(vcpu);
+ u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 753a56731951..f0b85443e060 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -287,7 +287,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev,
addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48);
return (void __iomem *) addr + offset;
}
-EXPORT_SYMBOL_GPL(pci_iomap_range);
+EXPORT_SYMBOL(pci_iomap_range);
void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen)
{
@@ -309,7 +309,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr)
}
spin_unlock(&zpci_iomap_lock);
}
-EXPORT_SYMBOL_GPL(pci_iounmap);
+EXPORT_SYMBOL(pci_iounmap);
static int pci_read(struct pci_bus *bus, unsigned int devfn, int where,
int size, u32 *val)
@@ -483,9 +483,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
airq_iv_free_bit(zpci_aisb_iv, zdev->aisb);
}
-static void zpci_map_resources(struct zpci_dev *zdev)
+static void zpci_map_resources(struct pci_dev *pdev)
{
- struct pci_dev *pdev = zdev->pdev;
resource_size_t len;
int i;
@@ -499,9 +498,8 @@ static void zpci_map_resources(struct zpci_dev *zdev)
}
}
-static void zpci_unmap_resources(struct zpci_dev *zdev)
+static void zpci_unmap_resources(struct pci_dev *pdev)
{
- struct pci_dev *pdev = zdev->pdev;
resource_size_t len;
int i;
@@ -651,7 +649,7 @@ int pcibios_add_device(struct pci_dev *pdev)
zdev->pdev = pdev;
pdev->dev.groups = zpci_attr_groups;
- zpci_map_resources(zdev);
+ zpci_map_resources(pdev);
for (i = 0; i < PCI_BAR_COUNT; i++) {
res = &pdev->resource[i];
@@ -663,6 +661,11 @@ int pcibios_add_device(struct pci_dev *pdev)
return 0;
}
+void pcibios_release_device(struct pci_dev *pdev)
+{
+ zpci_unmap_resources(pdev);
+}
+
int pcibios_enable_device(struct pci_dev *pdev, int mask)
{
struct zpci_dev *zdev = get_zdev(pdev);
@@ -670,7 +673,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask)
zdev->pdev = pdev;
zpci_debug_init_device(zdev);
zpci_fmb_enable_device(zdev);
- zpci_map_resources(zdev);
return pci_enable_resources(pdev, mask);
}
@@ -679,7 +681,6 @@ void pcibios_disable_device(struct pci_dev *pdev)
{
struct zpci_dev *zdev = get_zdev(pdev);
- zpci_unmap_resources(zdev);
zpci_fmb_disable_device(zdev);
zpci_debug_exit_device(zdev);
zdev->pdev = NULL;
@@ -688,7 +689,8 @@ void pcibios_disable_device(struct pci_dev *pdev)
#ifdef CONFIG_HIBERNATE_CALLBACKS
static int zpci_restore(struct device *dev)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
int ret = 0;
if (zdev->state != ZPCI_FN_STATE_ONLINE)
@@ -698,7 +700,7 @@ static int zpci_restore(struct device *dev)
if (ret)
goto out;
- zpci_map_resources(zdev);
+ zpci_map_resources(pdev);
zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
zdev->start_dma + zdev->iommu_size - 1,
(u64) zdev->dma_table);
@@ -709,12 +711,14 @@ out:
static int zpci_freeze(struct device *dev)
{
- struct zpci_dev *zdev = get_zdev(to_pci_dev(dev));
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct zpci_dev *zdev = get_zdev(pdev);
if (zdev->state != ZPCI_FN_STATE_ONLINE)
return 0;
zpci_unregister_ioat(zdev, 0);
+ zpci_unmap_resources(pdev);
return clp_disable_fh(zdev);
}
diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c
index 8aa271b3d1ad..b1bb2b72302c 100644
--- a/arch/s390/pci/pci_mmio.c
+++ b/arch/s390/pci/pci_mmio.c
@@ -64,8 +64,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr,
if (copy_from_user(buf, user_buffer, length))
goto out;
- memcpy_toio(io_addr, buf, length);
- ret = 0;
+ ret = zpci_memcpy_toio(io_addr, buf, length);
out:
if (buf != local_buf)
kfree(buf);
@@ -98,16 +97,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr,
goto out;
io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK));
- ret = -EFAULT;
- if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE)
+ if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) {
+ ret = -EFAULT;
goto out;
-
- memcpy_fromio(buf, io_addr, length);
-
- if (copy_to_user(user_buffer, buf, length))
+ }
+ ret = zpci_memcpy_fromio(buf, io_addr, length);
+ if (ret)
goto out;
+ if (copy_to_user(user_buffer, buf, length))
+ ret = -EFAULT;
- ret = 0;
out:
if (buf != local_buf)
kfree(buf);
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 96ac69c5eba0..efb00ec75805 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -86,6 +86,9 @@ config ARCH_DEFCONFIG
default "arch/sparc/configs/sparc32_defconfig" if SPARC32
default "arch/sparc/configs/sparc64_defconfig" if SPARC64
+config ARCH_PROC_KCORE_TEXT
+ def_bool y
+
config IOMMU_HELPER
bool
default y if SPARC64
diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h
index 4f6725ff4c33..f5b6537306f0 100644
--- a/arch/sparc/include/asm/hypervisor.h
+++ b/arch/sparc/include/asm/hypervisor.h
@@ -2957,6 +2957,17 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
unsigned long reg_val);
#endif
+
+#define HV_FAST_M7_GET_PERFREG 0x43
+#define HV_FAST_M7_SET_PERFREG 0x44
+
+#ifndef __ASSEMBLY__
+unsigned long sun4v_m7_get_perfreg(unsigned long reg_num,
+ unsigned long *reg_val);
+unsigned long sun4v_m7_set_perfreg(unsigned long reg_num,
+ unsigned long reg_val);
+#endif
+
/* Function numbers for HV_CORE_TRAP. */
#define HV_CORE_SET_VER 0x00
#define HV_CORE_PUTCHAR 0x01
@@ -2981,6 +2992,7 @@ unsigned long sun4v_t5_set_perfreg(unsigned long reg_num,
#define HV_GRP_SDIO 0x0108
#define HV_GRP_SDIO_ERR 0x0109
#define HV_GRP_REBOOT_DATA 0x0110
+#define HV_GRP_M7_PERF 0x0114
#define HV_GRP_NIAG_PERF 0x0200
#define HV_GRP_FIRE_PERF 0x0201
#define HV_GRP_N2_CPU 0x0202
diff --git a/arch/sparc/include/asm/io_64.h b/arch/sparc/include/asm/io_64.h
index 9b672be70dda..50d4840d9aeb 100644
--- a/arch/sparc/include/asm/io_64.h
+++ b/arch/sparc/include/asm/io_64.h
@@ -407,16 +407,16 @@ static inline void iounmap(volatile void __iomem *addr)
{
}
-#define ioread8(X) readb(X)
-#define ioread16(X) readw(X)
-#define ioread16be(X) __raw_readw(X)
-#define ioread32(X) readl(X)
-#define ioread32be(X) __raw_readl(X)
-#define iowrite8(val,X) writeb(val,X)
-#define iowrite16(val,X) writew(val,X)
-#define iowrite16be(val,X) __raw_writew(val,X)
-#define iowrite32(val,X) writel(val,X)
-#define iowrite32be(val,X) __raw_writel(val,X)
+#define ioread8 readb
+#define ioread16 readw
+#define ioread16be __raw_readw
+#define ioread32 readl
+#define ioread32be __raw_readl
+#define iowrite8 writeb
+#define iowrite16 writew
+#define iowrite16be __raw_writew
+#define iowrite32 writel
+#define iowrite32be __raw_writel
/* Create a virtual mapping cookie for an IO port range */
void __iomem *ioport_map(unsigned long port, unsigned int nr);
diff --git a/arch/sparc/include/asm/jump_label.h b/arch/sparc/include/asm/jump_label.h
index ec2e2e2aba7d..cc9b04a2b11b 100644
--- a/arch/sparc/include/asm/jump_label.h
+++ b/arch/sparc/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_SPARC_JUMP_LABEL_H
#define _ASM_SPARC_JUMP_LABEL_H
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
#include <linux/types.h>
@@ -22,8 +22,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
typedef u32 jump_label_t;
struct jump_entry {
@@ -32,4 +30,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/sparc/include/asm/starfire.h b/arch/sparc/include/asm/starfire.h
index c100dc27a0a9..176fa0ad19f1 100644
--- a/arch/sparc/include/asm/starfire.h
+++ b/arch/sparc/include/asm/starfire.h
@@ -12,7 +12,6 @@
extern int this_is_starfire;
void check_if_starfire(void);
-int starfire_hard_smp_processor_id(void);
void starfire_hookup(int);
unsigned int starfire_translate(unsigned long imap, unsigned int upaid);
diff --git a/arch/sparc/kernel/entry.h b/arch/sparc/kernel/entry.h
index 88d322b67fac..07cc49e541f4 100644
--- a/arch/sparc/kernel/entry.h
+++ b/arch/sparc/kernel/entry.h
@@ -98,11 +98,7 @@ void sun4v_do_mna(struct pt_regs *regs,
void do_privop(struct pt_regs *regs);
void do_privact(struct pt_regs *regs);
void do_cee(struct pt_regs *regs);
-void do_cee_tl1(struct pt_regs *regs);
-void do_dae_tl1(struct pt_regs *regs);
-void do_iae_tl1(struct pt_regs *regs);
void do_div0_tl1(struct pt_regs *regs);
-void do_fpdis_tl1(struct pt_regs *regs);
void do_fpieee_tl1(struct pt_regs *regs);
void do_fpother_tl1(struct pt_regs *regs);
void do_ill_tl1(struct pt_regs *regs);
diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c
index 5c55145bfbf0..662500fa555f 100644
--- a/arch/sparc/kernel/hvapi.c
+++ b/arch/sparc/kernel/hvapi.c
@@ -48,6 +48,7 @@ static struct api_info api_table[] = {
{ .group = HV_GRP_VT_CPU, },
{ .group = HV_GRP_T5_CPU, },
{ .group = HV_GRP_DIAG, .flags = FLAG_PRE_API },
+ { .group = HV_GRP_M7_PERF, },
};
static DEFINE_SPINLOCK(hvapi_lock);
diff --git a/arch/sparc/kernel/hvcalls.S b/arch/sparc/kernel/hvcalls.S
index caedf8320416..afbaba52d2f1 100644
--- a/arch/sparc/kernel/hvcalls.S
+++ b/arch/sparc/kernel/hvcalls.S
@@ -837,3 +837,19 @@ ENTRY(sun4v_t5_set_perfreg)
retl
nop
ENDPROC(sun4v_t5_set_perfreg)
+
+ENTRY(sun4v_m7_get_perfreg)
+ mov %o1, %o4
+ mov HV_FAST_M7_GET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ stx %o1, [%o4]
+ retl
+ nop
+ENDPROC(sun4v_m7_get_perfreg)
+
+ENTRY(sun4v_m7_set_perfreg)
+ mov HV_FAST_M7_SET_PERFREG, %o5
+ ta HV_FAST_TRAP
+ retl
+ nop
+ENDPROC(sun4v_m7_set_perfreg)
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 9ce5afe167ff..b36365f49478 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -639,10 +639,7 @@ static void pci_claim_bus_resources(struct pci_bus *bus)
(unsigned long long)r->end,
(unsigned int)r->flags);
- if (pci_claim_resource(dev, i) == 0)
- continue;
-
- pci_claim_bridge_resource(dev, i);
+ pci_claim_resource(dev, i);
}
}
diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c
index 7e967c8018c8..eb978c77c76a 100644
--- a/arch/sparc/kernel/pcr.c
+++ b/arch/sparc/kernel/pcr.c
@@ -217,6 +217,31 @@ static const struct pcr_ops n5_pcr_ops = {
.pcr_nmi_disable = PCR_N4_PICNPT,
};
+static u64 m7_pcr_read(unsigned long reg_num)
+{
+ unsigned long val;
+
+ (void) sun4v_m7_get_perfreg(reg_num, &val);
+
+ return val;
+}
+
+static void m7_pcr_write(unsigned long reg_num, u64 val)
+{
+ (void) sun4v_m7_set_perfreg(reg_num, val);
+}
+
+static const struct pcr_ops m7_pcr_ops = {
+ .read_pcr = m7_pcr_read,
+ .write_pcr = m7_pcr_write,
+ .read_pic = n4_pic_read,
+ .write_pic = n4_pic_write,
+ .nmi_picl_value = n4_picl_value,
+ .pcr_nmi_enable = (PCR_N4_PICNPT | PCR_N4_STRACE |
+ PCR_N4_UTRACE | PCR_N4_TOE |
+ (26 << PCR_N4_SL_SHIFT)),
+ .pcr_nmi_disable = PCR_N4_PICNPT,
+};
static unsigned long perf_hsvc_group;
static unsigned long perf_hsvc_major;
@@ -248,6 +273,10 @@ static int __init register_perf_hsvc(void)
perf_hsvc_group = HV_GRP_T5_CPU;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ perf_hsvc_group = HV_GRP_M7_PERF;
+ break;
+
default:
return -ENODEV;
}
@@ -293,6 +322,10 @@ static int __init setup_sun4v_pcr_ops(void)
pcr_ops = &n5_pcr_ops;
break;
+ case SUN4V_CHIP_SPARC_M7:
+ pcr_ops = &m7_pcr_ops;
+ break;
+
default:
ret = -ENODEV;
break;
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 46a5e4508752..86eebfa3b158 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -792,6 +792,42 @@ static const struct sparc_pmu niagara4_pmu = {
.num_pic_regs = 4,
};
+static void sparc_m7_write_pmc(int idx, u64 val)
+{
+ u64 pcr;
+
+ pcr = pcr_ops->read_pcr(idx);
+ /* ensure ov and ntc are reset */
+ pcr &= ~(PCR_N4_OV | PCR_N4_NTC);
+
+ pcr_ops->write_pic(idx, val & 0xffffffff);
+
+ pcr_ops->write_pcr(idx, pcr);
+}
+
+static const struct sparc_pmu sparc_m7_pmu = {
+ .event_map = niagara4_event_map,
+ .cache_map = &niagara4_cache_map,
+ .max_events = ARRAY_SIZE(niagara4_perfmon_event_map),
+ .read_pmc = sparc_vt_read_pmc,
+ .write_pmc = sparc_m7_write_pmc,
+ .upper_shift = 5,
+ .lower_shift = 5,
+ .event_mask = 0x7ff,
+ .user_bit = PCR_N4_UTRACE,
+ .priv_bit = PCR_N4_STRACE,
+
+ /* We explicitly don't support hypervisor tracing. */
+ .hv_bit = 0,
+
+ .irq_bit = PCR_N4_TOE,
+ .upper_nop = 0,
+ .lower_nop = 0,
+ .flags = 0,
+ .max_hw_events = 4,
+ .num_pcrs = 4,
+ .num_pic_regs = 4,
+};
static const struct sparc_pmu *sparc_pmu __read_mostly;
static u64 event_encoding(u64 event_id, int idx)
@@ -960,6 +996,8 @@ out:
cpuc->pcr[0] |= cpuc->event[0]->hw.config_base;
}
+static void sparc_pmu_start(struct perf_event *event, int flags);
+
/* On this PMU each PIC has it's own PCR control register. */
static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
{
@@ -972,20 +1010,13 @@ static void calculate_multiple_pcrs(struct cpu_hw_events *cpuc)
struct perf_event *cp = cpuc->event[i];
struct hw_perf_event *hwc = &cp->hw;
int idx = hwc->idx;
- u64 enc;
if (cpuc->current_idx[i] != PIC_NO_INDEX)
continue;
- sparc_perf_event_set_period(cp, hwc, idx);
cpuc->current_idx[i] = idx;
- enc = perf_event_get_enc(cpuc->events[i]);
- cpuc->pcr[idx] &= ~mask_for_index(idx);
- if (hwc->state & PERF_HES_STOPPED)
- cpuc->pcr[idx] |= nop_for_index(idx);
- else
- cpuc->pcr[idx] |= event_encoding(enc, idx);
+ sparc_pmu_start(cp, PERF_EF_RELOAD);
}
out:
for (i = 0; i < cpuc->n_events; i++) {
@@ -1101,7 +1132,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
int i;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
for (i = 0; i < cpuc->n_events; i++) {
if (event == cpuc->event[i]) {
@@ -1127,7 +1157,6 @@ static void sparc_pmu_del(struct perf_event *event, int _flags)
}
}
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
}
@@ -1361,7 +1390,6 @@ static int sparc_pmu_add(struct perf_event *event, int ef_flags)
unsigned long flags;
local_irq_save(flags);
- perf_pmu_disable(event->pmu);
n0 = cpuc->n_events;
if (n0 >= sparc_pmu->max_hw_events)
@@ -1394,7 +1422,6 @@ nocheck:
ret = 0;
out:
- perf_pmu_enable(event->pmu);
local_irq_restore(flags);
return ret;
}
@@ -1667,6 +1694,10 @@ static bool __init supported_pmu(void)
sparc_pmu = &niagara4_pmu;
return true;
}
+ if (!strcmp(sparc_pmu_type, "sparc-m7")) {
+ sparc_pmu = &sparc_m7_pmu;
+ return true;
+ }
return false;
}
diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
index 0be7bf978cb1..46a59643bb1c 100644
--- a/arch/sparc/kernel/process_64.c
+++ b/arch/sparc/kernel/process_64.c
@@ -287,6 +287,8 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
printk(" TPC[%lx] O7[%lx] I7[%lx] RPC[%lx]\n",
gp->tpc, gp->o7, gp->i7, gp->rpc);
}
+
+ touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
@@ -362,6 +364,8 @@ static void pmu_snapshot_all_cpus(void)
(cpu == this_cpu ? '*' : ' '), cpu,
pp->pcr[0], pp->pcr[1], pp->pcr[2], pp->pcr[3],
pp->pic[0], pp->pic[1], pp->pic[2], pp->pic[3]);
+
+ touch_nmi_watchdog();
}
memset(global_cpu_snapshot, 0, sizeof(global_cpu_snapshot));
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index da6f1a7fc4db..61139d9924ca 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1406,11 +1406,32 @@ void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs)
scheduler_ipi();
}
-/* This is a nop because we capture all other cpus
- * anyways when making the PROM active.
- */
+static void stop_this_cpu(void *dummy)
+{
+ prom_stopself();
+}
+
void smp_send_stop(void)
{
+ int cpu;
+
+ if (tlb_type == hypervisor) {
+ for_each_online_cpu(cpu) {
+ if (cpu == smp_processor_id())
+ continue;
+#ifdef CONFIG_SUN_LDOMS
+ if (ldom_domaining_enabled) {
+ unsigned long hv_err;
+ hv_err = sun4v_cpu_stop(cpu);
+ if (hv_err)
+ printk(KERN_ERR "sun4v_cpu_stop() "
+ "failed err=%lu\n", hv_err);
+ } else
+#endif
+ prom_stopcpu_cpuid(cpu);
+ }
+ } else
+ smp_call_function(stop_this_cpu, NULL, 0);
}
/**
diff --git a/arch/sparc/kernel/starfire.c b/arch/sparc/kernel/starfire.c
index 82281a566bb8..167fdfd9c837 100644
--- a/arch/sparc/kernel/starfire.c
+++ b/arch/sparc/kernel/starfire.c
@@ -28,11 +28,6 @@ void check_if_starfire(void)
this_is_starfire = 1;
}
-int starfire_hard_smp_processor_id(void)
-{
- return upa_readl(0x1fff40000d0UL);
-}
-
/*
* Each Starfire board has 32 registers which perform translation
* and delivery of traditional interrupt packets into the extended
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index c85403d0496c..30e7ddb27a3a 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -333,7 +333,7 @@ SYSCALL_DEFINE6(sparc_ipc, unsigned int, call, int, first, unsigned long, second
long err;
/* No need for backward compatibility. We can start fresh... */
- if (call <= SEMCTL) {
+ if (call <= SEMTIMEDOP) {
switch (call) {
case SEMOP:
err = sys_semtimedop(first, ptr,
diff --git a/arch/sparc/kernel/time_32.c b/arch/sparc/kernel/time_32.c
index 2f80d23a0a44..18147a5523d9 100644
--- a/arch/sparc/kernel/time_32.c
+++ b/arch/sparc/kernel/time_32.c
@@ -181,17 +181,13 @@ static struct clocksource timer_cs = {
.rating = 100,
.read = timer_cs_read,
.mask = CLOCKSOURCE_MASK(64),
- .shift = 2,
.flags = CLOCK_SOURCE_IS_CONTINUOUS,
};
static __init int setup_timer_cs(void)
{
timer_cs_enabled = 1;
- timer_cs.mult = clocksource_hz2mult(sparc_config.clock_rate,
- timer_cs.shift);
-
- return clocksource_register(&timer_cs);
+ return clocksource_register_hz(&timer_cs, sparc_config.clock_rate);
}
#ifdef CONFIG_SMP
diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c
index a27651e866e7..0e699745d643 100644
--- a/arch/sparc/kernel/traps_64.c
+++ b/arch/sparc/kernel/traps_64.c
@@ -2427,6 +2427,8 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs)
}
user_instruction_dump ((unsigned int __user *) regs->tpc);
}
+ if (panic_on_oops)
+ panic("Fatal exception");
if (regs->tstate & TSTATE_PRIV)
do_exit(SIGKILL);
do_exit(SIGSEGV);
@@ -2564,27 +2566,6 @@ void do_cee(struct pt_regs *regs)
die_if_kernel("TL0: Cache Error Exception", regs);
}
-void do_cee_tl1(struct pt_regs *regs)
-{
- exception_enter();
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
- die_if_kernel("TL1: Cache Error Exception", regs);
-}
-
-void do_dae_tl1(struct pt_regs *regs)
-{
- exception_enter();
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
- die_if_kernel("TL1: Data Access Exception", regs);
-}
-
-void do_iae_tl1(struct pt_regs *regs)
-{
- exception_enter();
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
- die_if_kernel("TL1: Instruction Access Exception", regs);
-}
-
void do_div0_tl1(struct pt_regs *regs)
{
exception_enter();
@@ -2592,13 +2573,6 @@ void do_div0_tl1(struct pt_regs *regs)
die_if_kernel("TL1: DIV0 Exception", regs);
}
-void do_fpdis_tl1(struct pt_regs *regs)
-{
- exception_enter();
- dump_tl1_traplog((struct tl1_traplog *)(regs + 1));
- die_if_kernel("TL1: FPU Disabled", regs);
-}
-
void do_fpieee_tl1(struct pt_regs *regs)
{
exception_enter();
diff --git a/arch/sparc/lib/memmove.S b/arch/sparc/lib/memmove.S
index b7f6334e159f..857ad4f8905f 100644
--- a/arch/sparc/lib/memmove.S
+++ b/arch/sparc/lib/memmove.S
@@ -8,9 +8,11 @@
.text
ENTRY(memmove) /* o0=dst o1=src o2=len */
- mov %o0, %g1
+ brz,pn %o2, 99f
+ mov %o0, %g1
+
cmp %o0, %o1
- bleu,pt %xcc, memcpy
+ bleu,pt %xcc, 2f
add %o1, %o2, %g7
cmp %g7, %o0
bleu,pt %xcc, memcpy
@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len */
stb %g7, [%o0]
bne,pt %icc, 1b
sub %o0, 1, %o0
-
+99:
retl
mov %g1, %o0
+
+ /* We can't just call memcpy for these memmove cases. On some
+ * chips the memcpy uses cache initializing stores and when dst
+ * and src are close enough, those can clobber the source data
+ * before we've loaded it in.
+ */
+2: or %o0, %o1, %g7
+ or %o2, %g7, %g7
+ andcc %g7, 0x7, %g0
+ bne,pn %xcc, 4f
+ nop
+
+3: ldx [%o1], %g7
+ add %o1, 8, %o1
+ subcc %o2, 8, %o2
+ add %o0, 8, %o0
+ bne,pt %icc, 3b
+ stx %g7, [%o0 - 0x8]
+ ba,a,pt %xcc, 99b
+
+4: ldub [%o1], %g7
+ add %o1, 1, %o1
+ subcc %o2, 1, %o2
+ add %o0, 1, %o0
+ bne,pt %icc, 4b
+ stb %g7, [%o0 - 0x1]
+ ba,a,pt %xcc, 99b
ENDPROC(memmove)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 3ea267c53320..4ca0d6ba5ec8 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2820,7 +2820,7 @@ static int __init report_memory(void)
return 0;
}
-device_initcall(report_memory);
+arch_initcall(report_memory);
#ifdef CONFIG_SMP
#define do_flush_tlb_kernel_range smp_flush_tlb_kernel_range
diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c
index d412b0856c0a..00178ecf9aea 100644
--- a/arch/tile/kernel/time.c
+++ b/arch/tile/kernel/time.c
@@ -257,34 +257,34 @@ void update_vsyscall_tz(void)
void update_vsyscall(struct timekeeper *tk)
{
- if (tk->tkr.clock != &cycle_counter_cs)
+ if (tk->tkr_mono.clock != &cycle_counter_cs)
return;
write_seqcount_begin(&vdso_data->tb_seq);
- vdso_data->cycle_last = tk->tkr.cycle_last;
- vdso_data->mask = tk->tkr.mask;
- vdso_data->mult = tk->tkr.mult;
- vdso_data->shift = tk->tkr.shift;
+ vdso_data->cycle_last = tk->tkr_mono.cycle_last;
+ vdso_data->mask = tk->tkr_mono.mask;
+ vdso_data->mult = tk->tkr_mono.mult;
+ vdso_data->shift = tk->tkr_mono.shift;
vdso_data->wall_time_sec = tk->xtime_sec;
- vdso_data->wall_time_snsec = tk->tkr.xtime_nsec;
+ vdso_data->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdso_data->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
- vdso_data->monotonic_time_snsec = tk->tkr.xtime_nsec
+ vdso_data->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->tkr.shift);
+ << tk->tkr_mono.shift);
while (vdso_data->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdso_data->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdso_data->monotonic_time_sec++;
}
vdso_data->wall_time_coarse_sec = tk->xtime_sec;
- vdso_data->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
- tk->tkr.shift);
+ vdso_data->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+ tk->tkr_mono.shift);
vdso_data->monotonic_time_coarse_sec =
vdso_data->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index c2fb8a87dccb..867bc5bea8dc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -235,12 +235,10 @@ config ARCH_WANT_GENERAL_HUGETLB
def_bool y
config ZONE_DMA32
- bool
- default X86_64
+ def_bool y if X86_64
config AUDIT_ARCH
- bool
- default X86_64
+ def_bool y if X86_64
config ARCH_SUPPORTS_OPTIMIZED_INLINING
def_bool y
@@ -499,6 +497,7 @@ config X86_INTEL_QUARK
depends on X86_IO_APIC
select IOSF_MBI
select INTEL_IMR
+ select COMMON_CLK
---help---
Select to include support for Quark X1000 SoC.
Say Y here if you have a Quark based system such as the Arduino
@@ -890,7 +889,8 @@ config UP_LATE_INIT
depends on !SMP && X86_LOCAL_APIC
config X86_UP_APIC
- bool "Local APIC support on uniprocessors"
+ bool "Local APIC support on uniprocessors" if !PCI_MSI
+ default PCI_MSI
depends on X86_32 && !SMP && !X86_32_NON_STANDARD
---help---
A local APIC (Advanced Programmable Interrupt Controller) is an
@@ -902,10 +902,6 @@ config X86_UP_APIC
performance counters), and the NMI watchdog which detects hard
lockups.
-config X86_UP_APIC_MSI
- def_bool y
- select X86_UP_APIC if X86_32 && !SMP && !X86_32_NON_STANDARD && PCI_MSI
-
config X86_UP_IOAPIC
bool "IO-APIC support on uniprocessors"
depends on X86_UP_APIC
@@ -924,8 +920,8 @@ config X86_LOCAL_APIC
select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
config X86_IO_APIC
- def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
- depends on X86_LOCAL_APIC
+ def_bool y
+ depends on X86_LOCAL_APIC || X86_UP_IOAPIC
select IRQ_DOMAIN
config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1144,10 +1140,10 @@ config MICROCODE_OLD_INTERFACE
depends on MICROCODE
config MICROCODE_INTEL_EARLY
- def_bool n
+ bool
config MICROCODE_AMD_EARLY
- def_bool n
+ bool
config MICROCODE_EARLY
bool "Early load microcode"
@@ -1746,14 +1742,11 @@ config KEXEC_VERIFY_SIG
depends on KEXEC_FILE
---help---
This option makes kernel signature verification mandatory for
- kexec_file_load() syscall. If kernel is signature can not be
- verified, kexec_file_load() will fail.
-
- This option enforces signature verification at generic level.
- One needs to enable signature verification for type of kernel
- image being loaded to make sure it works. For example, enable
- bzImage signature verification option to be able to load and
- verify signatures of bzImage. Otherwise kernel loading will fail.
+ the kexec_file_load() syscall.
+
+ In addition to that option, you need to enable signature
+ verification for the corresponding kernel image type being
+ loaded in order for this to work.
config KEXEC_BZIMAGE_VERIFY_SIG
bool "Enable bzImage signature verification support"
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c
index 7083c16cccba..d7b1f655b3ef 100644
--- a/arch/x86/boot/compressed/aslr.c
+++ b/arch/x86/boot/compressed/aslr.c
@@ -14,13 +14,6 @@
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-struct kaslr_setup_data {
- __u64 next;
- __u32 type;
- __u32 len;
- __u8 data[1];
-} kaslr_setup_data;
-
#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
@@ -302,28 +295,7 @@ static unsigned long find_random_addr(unsigned long minimum,
return slots_fetch_random();
}
-static void add_kaslr_setup_data(struct boot_params *params, __u8 enabled)
-{
- struct setup_data *data;
-
- kaslr_setup_data.type = SETUP_KASLR;
- kaslr_setup_data.len = 1;
- kaslr_setup_data.next = 0;
- kaslr_setup_data.data[0] = enabled;
-
- data = (struct setup_data *)(unsigned long)params->hdr.setup_data;
-
- while (data && data->next)
- data = (struct setup_data *)(unsigned long)data->next;
-
- if (data)
- data->next = (unsigned long)&kaslr_setup_data;
- else
- params->hdr.setup_data = (unsigned long)&kaslr_setup_data;
-
-}
-
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
@@ -335,17 +307,16 @@ unsigned char *choose_kernel_location(struct boot_params *params,
#ifdef CONFIG_HIBERNATION
if (!cmdline_find_option_bool("kaslr")) {
debug_putstr("KASLR disabled by default...\n");
- add_kaslr_setup_data(params, 0);
goto out;
}
#else
if (cmdline_find_option_bool("nokaslr")) {
debug_putstr("KASLR disabled by cmdline...\n");
- add_kaslr_setup_data(params, 0);
goto out;
}
#endif
- add_kaslr_setup_data(params, 1);
+
+ boot_params->hdr.loadflags |= KASLR_FLAG;
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S
index 1d7fbbcc196d..8ef964ddc18e 100644
--- a/arch/x86/boot/compressed/head_32.S
+++ b/arch/x86/boot/compressed/head_32.S
@@ -29,6 +29,7 @@
#include <asm/page_types.h>
#include <asm/boot.h>
#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
__HEAD
ENTRY(startup_32)
@@ -102,7 +103,7 @@ preferred_addr:
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
- testb $(1<<6), BP_loadflags(%esi)
+ testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f
cli
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index 6b1766c6c082..b0c0d16ef58d 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -31,6 +31,7 @@
#include <asm/msr.h>
#include <asm/processor-flags.h>
#include <asm/asm-offsets.h>
+#include <asm/bootparam.h>
__HEAD
.code32
@@ -46,7 +47,7 @@ ENTRY(startup_32)
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
* us to not reload segments
*/
- testb $(1<<6), BP_loadflags(%esi)
+ testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 1f
cli
@@ -164,7 +165,7 @@ ENTRY(startup_32)
/* After gdt is loaded */
xorl %eax, %eax
lldt %ax
- movl $0x20, %eax
+ movl $__BOOT_TSS, %eax
ltr %ax
/*
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 5903089c818f..a107b935e22f 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -377,6 +377,9 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
real_mode = rmode;
+ /* Clear it for solely in-kernel use */
+ real_mode->hdr.loadflags &= ~KASLR_FLAG;
+
sanitize_boot_params(real_mode);
if (real_mode->screen_info.orig_video_mode == 7) {
@@ -401,8 +404,7 @@ asmlinkage __visible void *decompress_kernel(void *rmode, memptr heap,
* the entire decompressed kernel plus relocation table, or the
* entire decompressed kernel plus .bss and .brk sections.
*/
- output = choose_kernel_location(real_mode, input_data, input_len,
- output,
+ output = choose_kernel_location(real_mode, input_data, input_len, output,
output_len > run_size ? output_len
: run_size);
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index ee3576b2666b..89dd0d78013a 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -57,7 +57,7 @@ int cmdline_find_option_bool(const char *option);
#if CONFIG_RANDOMIZE_BASE
/* aslr.c */
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
@@ -66,7 +66,7 @@ unsigned char *choose_kernel_location(struct boot_params *params,
bool has_cpuflag(int flag);
#else
static inline
-unsigned char *choose_kernel_location(struct boot_params *params,
+unsigned char *choose_kernel_location(struct boot_params *boot_params,
unsigned char *input,
unsigned long input_size,
unsigned char *output,
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 493f3fd9f139..318b8465d302 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
int delta = 0;
while (*s1 || *s2) {
- delta = *s2 - *s1;
+ delta = *s1 - *s2;
if (delta)
return delta;
s1++;
diff --git a/arch/x86/boot/video-mode.c b/arch/x86/boot/video-mode.c
index 748e8d06290a..aa8a96b052e3 100644
--- a/arch/x86/boot/video-mode.c
+++ b/arch/x86/boot/video-mode.c
@@ -22,10 +22,8 @@
/*
* Common variables
*/
-int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
-u16 video_segment;
+int adapter; /* 0=CGA/MDA/HGC, 1=EGA, 2=VGA+ */
int force_x, force_y; /* Don't query the BIOS for cols/rows */
-
int do_restore; /* Screen contents changed during mode flip */
int graphic_mode; /* Graphic mode with linear frame buffer */
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c
index 43eda284d27f..05111bb8d018 100644
--- a/arch/x86/boot/video.c
+++ b/arch/x86/boot/video.c
@@ -17,6 +17,8 @@
#include "video.h"
#include "vesa.h"
+static u16 video_segment;
+
static void store_cursor_position(void)
{
struct biosregs ireg, oreg;
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h
index 0bb25491262d..b54e0328c449 100644
--- a/arch/x86/boot/video.h
+++ b/arch/x86/boot/video.h
@@ -91,7 +91,6 @@ int mode_defined(u16 mode); /* video.c */
#define ADAPTER_VGA 2
extern int adapter;
-extern u16 video_segment;
extern int force_x, force_y; /* Don't query the BIOS for cols/rows */
extern int do_restore; /* Restore screen contents */
extern int graphic_mode; /* Graphics mode with linear frame buffer */
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 419819d6dab3..aaa1118bf01e 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -248,7 +248,7 @@ CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 4c311ddd973b..315b86106572 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -243,7 +243,7 @@ CONFIG_USB=y
CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
CONFIG_USB_MON=y
CONFIG_USB_EHCI_HCD=y
-# CONFIG_USB_EHCI_TT_NEWSCHED is not set
+CONFIG_USB_EHCI_TT_NEWSCHED=y
CONFIG_USB_OHCI_HCD=y
CONFIG_USB_UHCI_HCD=y
CONFIG_USB_PRINTER=y
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 947c6bf52c33..54f60ab41c63 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1155,7 +1155,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
src = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC);
if (!src)
return -ENOMEM;
- assoc = (src + req->cryptlen + auth_tag_len);
+ assoc = (src + req->cryptlen);
scatterwalk_map_and_copy(src, req->src, 0, req->cryptlen, 0);
scatterwalk_map_and_copy(assoc, req->assoc, 0,
req->assoclen, 0);
@@ -1180,7 +1180,7 @@ static int __driver_rfc4106_decrypt(struct aead_request *req)
scatterwalk_done(&src_sg_walk, 0, 0);
scatterwalk_done(&assoc_sg_walk, 0, 0);
} else {
- scatterwalk_map_and_copy(dst, req->dst, 0, req->cryptlen, 1);
+ scatterwalk_map_and_copy(dst, req->dst, 0, tempCipherLen, 1);
kfree(src);
}
return retval;
diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
index 26d49ebae040..225be06edc80 100644
--- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
+++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S
@@ -178,7 +178,7 @@ continue_block:
## 2a) PROCESS FULL BLOCKS:
################################################################
full_block:
- movq $128,%rax
+ movl $128,%eax
lea 128*8*2(block_0), block_1
lea 128*8*3(block_0), block_2
add $128*8*1, block_0
diff --git a/arch/x86/crypto/twofish-x86_64-asm_64.S b/arch/x86/crypto/twofish-x86_64-asm_64.S
index a039d21986a2..a350c990dc86 100644
--- a/arch/x86/crypto/twofish-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-x86_64-asm_64.S
@@ -264,7 +264,7 @@ ENTRY(twofish_enc_blk)
movq R1, 8(%rsi)
popq R1
- movq $1,%rax
+ movl $1,%eax
ret
ENDPROC(twofish_enc_blk)
@@ -316,6 +316,6 @@ ENTRY(twofish_dec_blk)
movq R1, 8(%rsi)
popq R1
- movq $1,%rax
+ movl $1,%eax
ret
ENDPROC(twofish_dec_blk)
diff --git a/arch/x86/ia32/Makefile b/arch/x86/ia32/Makefile
index e785b422b766..bb635c641869 100644
--- a/arch/x86/ia32/Makefile
+++ b/arch/x86/ia32/Makefile
@@ -3,7 +3,6 @@
#
obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o
-obj-$(CONFIG_IA32_EMULATION) += nosyscall.o syscall_ia32.o
obj-$(CONFIG_IA32_AOUT) += ia32_aout.o
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index d0165c9a2932..c81d35e6c7f1 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -161,8 +161,7 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from)
}
static int ia32_restore_sigcontext(struct pt_regs *regs,
- struct sigcontext_ia32 __user *sc,
- unsigned int *pax)
+ struct sigcontext_ia32 __user *sc)
{
unsigned int tmpflags, err = 0;
void __user *buf;
@@ -184,7 +183,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
RELOAD_SEG(es);
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
+ COPY(dx); COPY(cx); COPY(ip); COPY(ax);
/* Don't touch extended registers */
COPY_SEG_CPL3(cs);
@@ -197,12 +196,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
get_user_ex(tmp, &sc->fpstate);
buf = compat_ptr(tmp);
-
- get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= restore_xstate_sig(buf, 1);
+ force_iret();
+
return err;
}
@@ -211,7 +210,6 @@ asmlinkage long sys32_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8);
sigset_t set;
- unsigned int ax;
if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
goto badframe;
@@ -224,9 +222,9 @@ asmlinkage long sys32_sigreturn(void)
set_current_blocked(&set);
- if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
+ if (ia32_restore_sigcontext(regs, &frame->sc))
goto badframe;
- return ax;
+ return regs->ax;
badframe:
signal_fault(regs, frame, "32bit sigreturn");
@@ -238,7 +236,6 @@ asmlinkage long sys32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_ia32 __user *frame;
sigset_t set;
- unsigned int ax;
frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4);
@@ -249,13 +246,13 @@ asmlinkage long sys32_rt_sigreturn(void)
set_current_blocked(&set);
- if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+ if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
- return ax;
+ return regs->ax;
badframe:
signal_fault(regs, frame, "32bit rt sigreturn");
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 156ebcab4ada..a821b1cd4fa7 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -30,24 +30,13 @@
.section .entry.text, "ax"
- .macro IA32_ARG_FIXUP noebp=0
- movl %edi,%r8d
- .if \noebp
- .else
- movl %ebp,%r9d
- .endif
- xchg %ecx,%esi
- movl %ebx,%edi
- movl %edx,%edx /* zero extension */
- .endm
-
- /* clobbers %eax */
- .macro CLEAR_RREGS offset=0, _r9=rax
+ /* clobbers %rax */
+ .macro CLEAR_RREGS _r9=rax
xorl %eax,%eax
- movq %rax,\offset+R11(%rsp)
- movq %rax,\offset+R10(%rsp)
- movq %\_r9,\offset+R9(%rsp)
- movq %rax,\offset+R8(%rsp)
+ movq %rax,R11(%rsp)
+ movq %rax,R10(%rsp)
+ movq %\_r9,R9(%rsp)
+ movq %rax,R8(%rsp)
.endm
/*
@@ -60,14 +49,14 @@
* If it's -1 to make us punt the syscall, then (u32)-1 is still
* an appropriately invalid value.
*/
- .macro LOAD_ARGS32 offset, _r9=0
+ .macro LOAD_ARGS32 _r9=0
.if \_r9
- movl \offset+16(%rsp),%r9d
+ movl R9(%rsp),%r9d
.endif
- movl \offset+40(%rsp),%ecx
- movl \offset+48(%rsp),%edx
- movl \offset+56(%rsp),%esi
- movl \offset+64(%rsp),%edi
+ movl RCX(%rsp),%ecx
+ movl RDX(%rsp),%edx
+ movl RSI(%rsp),%esi
+ movl RDI(%rsp),%edi
movl %eax,%eax /* zero extension */
.endm
@@ -99,54 +88,69 @@ ENDPROC(native_irq_enable_sysexit)
/*
* 32bit SYSENTER instruction entry.
*
+ * SYSENTER loads ss, rsp, cs, and rip from previously programmed MSRs.
+ * IF and VM in rflags are cleared (IOW: interrupts are off).
+ * SYSENTER does not save anything on the stack,
+ * and does not save old rip (!!!) and rflags.
+ *
* Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp user stack
- * 0(%ebp) Arg6
- *
- * Interrupts off.
- *
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp user stack
+ * 0(%ebp) arg6
+ *
* This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
+ * path below. We set up a complete hardware stack frame to share code
* with the int 0x80 path.
- */
+ */
ENTRY(ia32_sysenter_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
CFI_DEF_CFA rsp,0
CFI_REGISTER rsp,rbp
- SWAPGS_UNSAFE_STACK
- movq PER_CPU_VAR(kernel_stack), %rsp
- addq $(KERNEL_STACK_OFFSET),%rsp
+
/*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs, here we enable it straight after entry:
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
*/
+ SWAPGS_UNSAFE_STACK
+ movq PER_CPU_VAR(cpu_tss + TSS_sp0), %rsp
ENABLE_INTERRUPTS(CLBR_NONE)
- movl %ebp,%ebp /* zero extension */
- pushq_cfi $__USER32_DS
- /*CFI_REL_OFFSET ss,0*/
- pushq_cfi %rbp
- CFI_REL_OFFSET rsp,0
- pushfq_cfi
- /*CFI_REL_OFFSET rflags,0*/
- movl TI_sysenter_return+THREAD_INFO(%rsp,3*8-KERNEL_STACK_OFFSET),%r10d
- CFI_REGISTER rip,r10
- pushq_cfi $__USER32_CS
- /*CFI_REL_OFFSET cs,0*/
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %ebp, %ebp
movl %eax, %eax
- pushq_cfi %r10
- CFI_REL_OFFSET rip,0
- pushq_cfi %rax
+
+ movl ASM_THREAD_INFO(TI_sysenter_return, %rsp, 0), %r10d
+ CFI_REGISTER rip,r10
+
+ /* Construct struct pt_regs on stack */
+ pushq_cfi $__USER32_DS /* pt_regs->ss */
+ pushq_cfi %rbp /* pt_regs->sp */
+ CFI_REL_OFFSET rsp,0
+ pushfq_cfi /* pt_regs->flags */
+ pushq_cfi $__USER32_CS /* pt_regs->cs */
+ pushq_cfi %r10 /* pt_regs->ip = thread_info->sysenter_return */
+ CFI_REL_OFFSET rip,0
+ pushq_cfi_reg rax /* pt_regs->orig_ax */
+ pushq_cfi_reg rdi /* pt_regs->di */
+ pushq_cfi_reg rsi /* pt_regs->si */
+ pushq_cfi_reg rdx /* pt_regs->dx */
+ pushq_cfi_reg rcx /* pt_regs->cx */
+ pushq_cfi_reg rax /* pt_regs->ax */
cld
- SAVE_ARGS 0,1,0
- /* no need to do an access_ok check here because rbp has been
- 32bit zero extended */
+ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+ CFI_ADJUST_CFA_OFFSET 10*8
+
+ /*
+ * no need to do an access_ok check here because rbp has been
+ * 32bit zero extended
+ */
ASM_STAC
1: movl (%rbp),%ebp
_ASM_EXTABLE(1b,ia32_badarg)
@@ -157,42 +161,80 @@ ENTRY(ia32_sysenter_target)
* ourselves. To save a few cycles, we can check whether
* NT was set instead of doing an unconditional popfq.
*/
- testl $X86_EFLAGS_NT,EFLAGS-ARGOFFSET(%rsp)
+ testl $X86_EFLAGS_NT,EFLAGS(%rsp)
jnz sysenter_fix_flags
sysenter_flags_fixed:
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz sysenter_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
sysenter_do_call:
- IA32_ARG_FIXUP
+ /* 32bit syscall -> 64bit C ABI argument conversion */
+ movl %edi,%r8d /* arg5 */
+ movl %ebp,%r9d /* arg6 */
+ xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx,%edi /* arg1 */
+ movl %edx,%edx /* arg3 (zero extension) */
sysenter_dispatch:
call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
+ movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysexit_audit
sysexit_from_sys_call:
- andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- /* clear IF, that popfq doesn't enable interrupts early */
- andl $~0x200,EFLAGS-ARGOFFSET(%rsp)
- movl RIP-ARGOFFSET(%rsp),%edx /* User %eip */
- CFI_REGISTER rip,rdx
- RESTORE_ARGS 0,24,0,0,0,0
+ /*
+ * NB: SYSEXIT is not obviously safe for 64-bit kernels -- an
+ * NMI between STI and SYSEXIT has poorly specified behavior,
+ * and and NMI followed by an IRQ with usergs is fatal. So
+ * we just pretend we're using SYSEXIT but we really use
+ * SYSRETL instead.
+ *
+ * This code path is still called 'sysexit' because it pairs
+ * with 'sysenter' and it uses the SYSENTER calling convention.
+ */
+ andl $~TS_COMPAT,ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ movl RIP(%rsp),%ecx /* User %eip */
+ CFI_REGISTER rip,rcx
+ RESTORE_RSI_RDI
+ xorl %edx,%edx /* avoid info leaks */
xorq %r8,%r8
xorq %r9,%r9
xorq %r10,%r10
- xorq %r11,%r11
- popfq_cfi
+ movl EFLAGS(%rsp),%r11d /* User eflags */
/*CFI_RESTORE rflags*/
- popq_cfi %rcx /* User %esp */
- CFI_REGISTER rsp,rcx
TRACE_IRQS_ON
- ENABLE_INTERRUPTS_SYSEXIT32
+
+ /*
+ * SYSRETL works even on Intel CPUs. Use it in preference to SYSEXIT,
+ * since it avoids a dicey window with interrupts enabled.
+ */
+ movl RSP(%rsp),%esp
+
+ /*
+ * USERGS_SYSRET32 does:
+ * gsbase = user's gs base
+ * eip = ecx
+ * rflags = r11
+ * cs = __USER32_CS
+ * ss = __USER_DS
+ *
+ * The prologue set RIP(%rsp) to VDSO32_SYSENTER_RETURN, which does:
+ *
+ * pop %ebp
+ * pop %edx
+ * pop %ecx
+ *
+ * Therefore, we invoke SYSRETL with EDX and R8-R10 zeroed to
+ * avoid info leaks. R11 ends up with VDSO32_SYSENTER_RETURN's
+ * address (already known to user code), and R12-R15 are
+ * callee-saved and therefore don't contain any interesting
+ * kernel data.
+ */
+ USERGS_SYSRET32
CFI_RESTORE_STATE
@@ -205,18 +247,18 @@ sysexit_from_sys_call:
movl %ebx,%esi /* 2nd arg: 1st syscall arg */
movl %eax,%edi /* 1st arg: syscall number */
call __audit_syscall_entry
- movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
+ movl RAX(%rsp),%eax /* reload syscall number */
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
movl %ebx,%edi /* reload 1st syscall arg */
- movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
- movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */
- movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */
- movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */
+ movl RCX(%rsp),%esi /* reload 2nd syscall arg */
+ movl RDX(%rsp),%edx /* reload 3rd syscall arg */
+ movl RSI(%rsp),%ecx /* reload 4th syscall arg */
+ movl RDI(%rsp),%r8d /* reload 5th syscall arg */
.endm
.macro auditsys_exit exit
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_ret_from_sys_call
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
@@ -227,13 +269,13 @@ sysexit_from_sys_call:
1: setbe %al /* 1 if error, 0 if not */
movzbl %al,%edi /* zero-extend that into %edi */
call __audit_syscall_exit
- movq RAX-ARGOFFSET(%rsp),%rax /* reload syscall return value */
+ movq RAX(%rsp),%rax /* reload syscall return value */
movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl %edi,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl %edi, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz \exit
- CLEAR_RREGS -ARGOFFSET
+ CLEAR_RREGS
jmp int_with_check
.endm
@@ -253,16 +295,16 @@ sysenter_fix_flags:
sysenter_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz sysenter_auditsys
#endif
- SAVE_REST
+ SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp)/* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
+ LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
+ RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
jmp sysenter_do_call
@@ -272,94 +314,128 @@ ENDPROC(ia32_sysenter_target)
/*
* 32bit SYSCALL instruction entry.
*
+ * 32bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
+ *
+ * Note: rflags saving+masking-with-MSR happens only in Long mode
+ * (in legacy 32bit mode, IF, RF and VM bits are cleared and that's it).
+ * Don't get confused: rflags saving+masking depends on Long Mode Active bit
+ * (EFER.LMA=1), NOT on bitness of userspace where SYSCALL executes
+ * or target CS descriptor's L bit (SYSCALL does not read segment descriptors).
+ *
* Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx return EIP
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg2 [note: not saved in the stack frame, should not be touched]
- * %esp user stack
- * 0(%esp) Arg6
- *
- * Interrupts off.
- *
+ * eax system call number
+ * ecx return address
+ * ebx arg1
+ * ebp arg2 (note: not saved in the stack frame, should not be touched)
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * esp user stack
+ * 0(%esp) arg6
+ *
* This is purely a fast path. For anything complicated we use the int 0x80
- * path below. Set up a complete hardware stack frame to share code
- * with the int 0x80 path.
- */
+ * path below. We set up a complete hardware stack frame to share code
+ * with the int 0x80 path.
+ */
ENTRY(ia32_cstar_target)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
+ CFI_DEF_CFA rsp,0
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
+
+ /*
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
SWAPGS_UNSAFE_STACK
movl %esp,%r8d
CFI_REGISTER rsp,r8
movq PER_CPU_VAR(kernel_stack),%rsp
- /*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs and here we enable it straight after entry:
- */
ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_ARGS 8,0,0
- movl %eax,%eax /* zero extension */
- movq %rax,ORIG_RAX-ARGOFFSET(%rsp)
- movq %rcx,RIP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- movq %rbp,RCX-ARGOFFSET(%rsp) /* this lies slightly to ptrace */
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax,%eax
+
+ /* Construct struct pt_regs on stack */
+ pushq_cfi $__USER32_DS /* pt_regs->ss */
+ pushq_cfi %r8 /* pt_regs->sp */
+ CFI_REL_OFFSET rsp,0
+ pushq_cfi %r11 /* pt_regs->flags */
+ pushq_cfi $__USER32_CS /* pt_regs->cs */
+ pushq_cfi %rcx /* pt_regs->ip */
+ CFI_REL_OFFSET rip,0
+ pushq_cfi_reg rax /* pt_regs->orig_ax */
+ pushq_cfi_reg rdi /* pt_regs->di */
+ pushq_cfi_reg rsi /* pt_regs->si */
+ pushq_cfi_reg rdx /* pt_regs->dx */
+ pushq_cfi_reg rbp /* pt_regs->cx */
movl %ebp,%ecx
- movq $__USER32_CS,CS-ARGOFFSET(%rsp)
- movq $__USER32_DS,SS-ARGOFFSET(%rsp)
- movq %r11,EFLAGS-ARGOFFSET(%rsp)
- /*CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- movq %r8,RSP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
- /* no need to do an access_ok check here because r8 has been
- 32bit zero extended */
- /* hardware stack frame is complete now */
+ pushq_cfi_reg rax /* pt_regs->ax */
+ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+ CFI_ADJUST_CFA_OFFSET 10*8
+
+ /*
+ * no need to do an access_ok check here because r8 has been
+ * 32bit zero extended
+ */
ASM_STAC
1: movl (%r8),%r9d
_ASM_EXTABLE(1b,ia32_badarg)
ASM_CLAC
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
CFI_REMEMBER_STATE
jnz cstar_tracesys
cmpq $IA32_NR_syscalls-1,%rax
ja ia32_badsys
cstar_do_call:
- IA32_ARG_FIXUP 1
+ /* 32bit syscall -> 64bit C ABI argument conversion */
+ movl %edi,%r8d /* arg5 */
+ /* r9 already loaded */ /* arg6 */
+ xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx,%edi /* arg1 */
+ movl %edx,%edx /* arg3 (zero extension) */
cstar_dispatch:
call *ia32_sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
+ movq %rax,RAX(%rsp)
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz sysretl_audit
sysretl_from_sys_call:
- andl $~TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- RESTORE_ARGS 0,-ARG_SKIP,0,0,0
- movl RIP-ARGOFFSET(%rsp),%ecx
+ andl $~TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ RESTORE_RSI_RDI_RDX
+ movl RIP(%rsp),%ecx
CFI_REGISTER rip,rcx
- movl EFLAGS-ARGOFFSET(%rsp),%r11d
+ movl EFLAGS(%rsp),%r11d
/*CFI_REGISTER rflags,r11*/
xorq %r10,%r10
xorq %r9,%r9
xorq %r8,%r8
TRACE_IRQS_ON
- movl RSP-ARGOFFSET(%rsp),%esp
+ movl RSP(%rsp),%esp
CFI_RESTORE rsp
+ /*
+ * 64bit->32bit SYSRET restores eip from ecx,
+ * eflags from r11 (but RF and VM bits are forced to 0),
+ * cs and ss are loaded from MSRs.
+ * (Note: 32bit->32bit SYSRET is different: since r11
+ * does not exist, it merely sets eflags.IF=1).
+ */
USERGS_SYSRET32
-
+
#ifdef CONFIG_AUDITSYSCALL
cstar_auditsys:
CFI_RESTORE_STATE
- movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */
+ movl %r9d,R9(%rsp) /* register to be clobbered by call */
auditsys_entry_common
- movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */
+ movl R9(%rsp),%r9d /* reload 6th syscall arg */
jmp cstar_dispatch
sysretl_audit:
@@ -368,17 +444,17 @@ sysretl_audit:
cstar_tracesys:
#ifdef CONFIG_AUDITSYSCALL
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jz cstar_auditsys
#endif
xchgl %r9d,%ebp
- SAVE_REST
- CLEAR_RREGS 0, r9
+ SAVE_EXTRA_REGS
+ CLEAR_RREGS r9
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
- RESTORE_REST
+ LOAD_ARGS32 1 /* reload args from stack in case ptrace changed it */
+ RESTORE_EXTRA_REGS
xchgl %ebp,%r9d
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
@@ -391,78 +467,94 @@ ia32_badarg:
jmp ia32_sysret
CFI_ENDPROC
-/*
- * Emulated IA32 system calls via int 0x80.
+/*
+ * Emulated IA32 system calls via int 0x80.
*
- * Arguments:
- * %eax System call number.
- * %ebx Arg1
- * %ecx Arg2
- * %edx Arg3
- * %esi Arg4
- * %edi Arg5
- * %ebp Arg6 [note: not saved in the stack frame, should not be touched]
+ * Arguments:
+ * eax system call number
+ * ebx arg1
+ * ecx arg2
+ * edx arg3
+ * esi arg4
+ * edi arg5
+ * ebp arg6 (note: not saved in the stack frame, should not be touched)
*
* Notes:
- * Uses the same stack frame as the x86-64 version.
- * All registers except %eax must be saved (but ptrace may violate that)
+ * Uses the same stack frame as the x86-64 version.
+ * All registers except eax must be saved (but ptrace may violate that).
* Arguments are zero extended. For system calls that want sign extension and
* take long arguments a wrapper is needed. Most calls can just be called
* directly.
- * Assumes it is only called from user space and entered with interrupts off.
- */
+ * Assumes it is only called from user space and entered with interrupts off.
+ */
ENTRY(ia32_syscall)
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-RIP
- /*CFI_REL_OFFSET ss,SS-RIP*/
- CFI_REL_OFFSET rsp,RSP-RIP
- /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/
- /*CFI_REL_OFFSET cs,CS-RIP*/
- CFI_REL_OFFSET rip,RIP-RIP
- PARAVIRT_ADJUST_EXCEPTION_FRAME
- SWAPGS
+ CFI_DEF_CFA rsp,5*8
+ /*CFI_REL_OFFSET ss,4*8 */
+ CFI_REL_OFFSET rsp,3*8
+ /*CFI_REL_OFFSET rflags,2*8 */
+ /*CFI_REL_OFFSET cs,1*8 */
+ CFI_REL_OFFSET rip,0*8
+
/*
- * No need to follow this irqs on/off section: the syscall
- * disabled irqs and here we enable it straight after entry:
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
*/
+ PARAVIRT_ADJUST_EXCEPTION_FRAME
+ SWAPGS
ENABLE_INTERRUPTS(CLBR_NONE)
- movl %eax,%eax
- pushq_cfi %rax
+
+ /* Zero-extending 32-bit regs, do not remove */
+ movl %eax,%eax
+
+ /* Construct struct pt_regs on stack (iret frame is already on stack) */
+ pushq_cfi_reg rax /* pt_regs->orig_ax */
+ pushq_cfi_reg rdi /* pt_regs->di */
+ pushq_cfi_reg rsi /* pt_regs->si */
+ pushq_cfi_reg rdx /* pt_regs->dx */
+ pushq_cfi_reg rcx /* pt_regs->cx */
+ pushq_cfi_reg rax /* pt_regs->ax */
cld
- /* note the registers are not zero extended to the sf.
- this could be a problem. */
- SAVE_ARGS 0,1,0
- orl $TS_COMPAT,TI_status+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ sub $(10*8),%rsp /* pt_regs->r8-11,bp,bx,r12-15 not saved */
+ CFI_ADJUST_CFA_OFFSET 10*8
+
+ orl $TS_COMPAT, ASM_THREAD_INFO(TI_status, %rsp, SIZEOF_PTREGS)
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz ia32_tracesys
cmpq $(IA32_NR_syscalls-1),%rax
ja ia32_badsys
ia32_do_call:
- IA32_ARG_FIXUP
+ /* 32bit syscall -> 64bit C ABI argument conversion */
+ movl %edi,%r8d /* arg5 */
+ movl %ebp,%r9d /* arg6 */
+ xchg %ecx,%esi /* rsi:arg2, rcx:arg4 */
+ movl %ebx,%edi /* arg1 */
+ movl %edx,%edx /* arg3 (zero extension) */
call *ia32_sys_call_table(,%rax,8) # xxx: rip relative
ia32_sysret:
- movq %rax,RAX-ARGOFFSET(%rsp)
+ movq %rax,RAX(%rsp)
ia32_ret_from_sys_call:
- CLEAR_RREGS -ARGOFFSET
- jmp int_ret_from_sys_call
+ CLEAR_RREGS
+ jmp int_ret_from_sys_call
-ia32_tracesys:
- SAVE_REST
+ia32_tracesys:
+ SAVE_EXTRA_REGS
CLEAR_RREGS
movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */
movq %rsp,%rdi /* &pt_regs -> arg1 */
call syscall_trace_enter
- LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
- RESTORE_REST
+ LOAD_ARGS32 /* reload args from stack in case ptrace changed it */
+ RESTORE_EXTRA_REGS
cmpq $(IA32_NR_syscalls-1),%rax
ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
jmp ia32_do_call
END(ia32_syscall)
ia32_badsys:
- movq $0,ORIG_RAX-ARGOFFSET(%rsp)
+ movq $0,ORIG_RAX(%rsp)
movq $-ENOSYS,%rax
jmp ia32_sysret
@@ -479,8 +571,6 @@ GLOBAL(\label)
PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn
PTREGSCALL stub32_sigreturn, sys32_sigreturn
- PTREGSCALL stub32_execve, compat_sys_execve
- PTREGSCALL stub32_execveat, compat_sys_execveat
PTREGSCALL stub32_fork, sys_fork
PTREGSCALL stub32_vfork, sys_vfork
@@ -492,24 +582,23 @@ GLOBAL(stub32_clone)
ALIGN
ia32_ptregs_common:
- popq %r11
CFI_ENDPROC
CFI_STARTPROC32 simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,SS+8-ARGOFFSET
- CFI_REL_OFFSET rax,RAX-ARGOFFSET
- CFI_REL_OFFSET rcx,RCX-ARGOFFSET
- CFI_REL_OFFSET rdx,RDX-ARGOFFSET
- CFI_REL_OFFSET rsi,RSI-ARGOFFSET
- CFI_REL_OFFSET rdi,RDI-ARGOFFSET
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
-/* CFI_REL_OFFSET cs,CS-ARGOFFSET*/
-/* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/
- CFI_REL_OFFSET rsp,RSP-ARGOFFSET
-/* CFI_REL_OFFSET ss,SS-ARGOFFSET*/
- SAVE_REST
+ CFI_DEF_CFA rsp,SIZEOF_PTREGS
+ CFI_REL_OFFSET rax,RAX
+ CFI_REL_OFFSET rcx,RCX
+ CFI_REL_OFFSET rdx,RDX
+ CFI_REL_OFFSET rsi,RSI
+ CFI_REL_OFFSET rdi,RDI
+ CFI_REL_OFFSET rip,RIP
+/* CFI_REL_OFFSET cs,CS*/
+/* CFI_REL_OFFSET rflags,EFLAGS*/
+ CFI_REL_OFFSET rsp,RSP
+/* CFI_REL_OFFSET ss,SS*/
+ SAVE_EXTRA_REGS 8
call *%rax
- RESTORE_REST
- jmp ia32_sysret /* misbalances the return cache */
+ RESTORE_EXTRA_REGS 8
+ ret
CFI_ENDPROC
END(ia32_ptregs_common)
diff --git a/arch/x86/ia32/nosyscall.c b/arch/x86/ia32/nosyscall.c
deleted file mode 100644
index 51ecd5b4e787..000000000000
--- a/arch/x86/ia32/nosyscall.c
+++ /dev/null
@@ -1,7 +0,0 @@
-#include <linux/kernel.h>
-#include <linux/errno.h>
-
-long compat_ni_syscall(void)
-{
- return -ENOSYS;
-}
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 8e0ceecdc957..719cd702b0a4 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -201,20 +201,6 @@ long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high,
advice);
}
-long sys32_vm86_warning(void)
-{
- struct task_struct *me = current;
- static char lastcomm[sizeof(me->comm)];
-
- if (strncmp(lastcomm, me->comm, sizeof(lastcomm))) {
- compat_printk(KERN_INFO
- "%s: vm86 mode not supported on 64 bit kernel\n",
- me->comm);
- strncpy(lastcomm, me->comm, sizeof(lastcomm));
- }
- return -ENOSYS;
-}
-
asmlinkage ssize_t sys32_readahead(int fd, unsigned off_lo, unsigned off_hi,
size_t count)
{
diff --git a/arch/x86/ia32/syscall_ia32.c b/arch/x86/ia32/syscall_ia32.c
deleted file mode 100644
index 4754ba0f5d9f..000000000000
--- a/arch/x86/ia32/syscall_ia32.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* System call table for ia32 emulation. */
-
-#include <linux/linkage.h>
-#include <linux/sys.h>
-#include <linux/cache.h>
-#include <asm/asm-offsets.h>
-
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void compat(void) ;
-#include <asm/syscalls_32.h>
-#undef __SYSCALL_I386
-
-#define __SYSCALL_I386(nr, sym, compat) [nr] = compat,
-
-typedef void (*sys_call_ptr_t)(void);
-
-extern void compat_ni_syscall(void);
-
-const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
- /*
- * Smells like a compiler bug -- it doesn't work
- * when the & below is removed.
- */
- [0 ... __NR_ia32_syscall_max] = &compat_ni_syscall,
-#include <asm/syscalls_32.h>
-};
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index 372231c22a47..bdf02eeee765 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -18,12 +18,63 @@
.endm
#endif
-.macro altinstruction_entry orig alt feature orig_len alt_len
+.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
.long \orig - .
.long \alt - .
.word \feature
.byte \orig_len
.byte \alt_len
+ .byte \pad_len
+.endm
+
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140:
+ \oldinstr
+141:
+ .skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr
+144:
+ .popsection
+.endm
+
+#define old_len 141b-140b
+#define new_len1 144f-143f
+#define new_len2 145f-144f
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ */
+#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140:
+ \oldinstr
+141:
+ .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+ (alt_max_short(new_len1, new_len2) - (old_len)),0x90
+142:
+
+ .pushsection .altinstructions,"a"
+ altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
+ altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
+ .popsection
+
+ .pushsection .altinstr_replacement,"ax"
+143:
+ \newinstr1
+144:
+ \newinstr2
+145:
+ .popsection
.endm
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 473bdbee378a..ba32af062f61 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -48,8 +48,9 @@ struct alt_instr {
s32 repl_offset; /* offset to replacement instruction */
u16 cpuid; /* cpuid bit set for replacement */
u8 instrlen; /* length of original instruction */
- u8 replacementlen; /* length of new instruction, <= instrlen */
-};
+ u8 replacementlen; /* length of new instruction */
+ u8 padlen; /* length of build-time padding */
+} __packed;
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
@@ -76,50 +77,69 @@ static inline int alternatives_text_reserved(void *start, void *end)
}
#endif /* CONFIG_SMP */
-#define OLDINSTR(oldinstr) "661:\n\t" oldinstr "\n662:\n"
+#define b_replacement(num) "664"#num
+#define e_replacement(num) "665"#num
-#define b_replacement(number) "663"#number
-#define e_replacement(number) "664"#number
+#define alt_end_marker "663"
+#define alt_slen "662b-661b"
+#define alt_pad_len alt_end_marker"b-662b"
+#define alt_total_slen alt_end_marker"b-661b"
+#define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f"
-#define alt_slen "662b-661b"
-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
+#define __OLDINSTR(oldinstr, num) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \
+ "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
-#define ALTINSTR_ENTRY(feature, number) \
+#define OLDINSTR(oldinstr, num) \
+ __OLDINSTR(oldinstr, num) \
+ alt_end_marker ":\n"
+
+/*
+ * max without conditionals. Idea adapted from:
+ * http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas works with s32s.
+ */
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+ "661:\n\t" oldinstr "\n662:\n" \
+ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \
+ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \
+ alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num) \
" .long 661b - .\n" /* label */ \
- " .long " b_replacement(number)"f - .\n" /* new instruction */ \
+ " .long " b_replacement(num)"f - .\n" /* new instruction */ \
" .word " __stringify(feature) "\n" /* feature bit */ \
- " .byte " alt_slen "\n" /* source len */ \
- " .byte " alt_rlen(number) "\n" /* replacement len */
-
-#define DISCARD_ENTRY(number) /* rlen <= slen */ \
- " .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
+ " .byte " alt_total_slen "\n" /* source len */ \
+ " .byte " alt_rlen(num) "\n" /* replacement len */ \
+ " .byte " alt_pad_len "\n" /* pad len */
-#define ALTINSTR_REPLACEMENT(newinstr, feature, number) /* replacement */ \
- b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \
+ b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
/* alternative assembly primitive: */
#define ALTERNATIVE(oldinstr, newinstr, feature) \
- OLDINSTR(oldinstr) \
+ OLDINSTR(oldinstr, 1) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature, 1) \
".popsection\n" \
- ".pushsection .discard,\"aw\",@progbits\n" \
- DISCARD_ENTRY(1) \
- ".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr, feature, 1) \
".popsection"
#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
- OLDINSTR(oldinstr) \
+ OLDINSTR_2(oldinstr, 1, 2) \
".pushsection .altinstructions,\"a\"\n" \
ALTINSTR_ENTRY(feature1, 1) \
ALTINSTR_ENTRY(feature2, 2) \
".popsection\n" \
- ".pushsection .discard,\"aw\",@progbits\n" \
- DISCARD_ENTRY(1) \
- DISCARD_ENTRY(2) \
- ".popsection\n" \
".pushsection .altinstr_replacement, \"ax\"\n" \
ALTINSTR_REPLACEMENT(newinstr1, feature1, 1) \
ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \
@@ -146,6 +166,9 @@ static inline int alternatives_text_reserved(void *start, void *end)
#define alternative(oldinstr, newinstr, feature) \
asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+ asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
+
/*
* Alternative inline assembly with input.
*
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index efc3b22d896e..976b86a325e5 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -91,7 +91,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v)
{
volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
- alternative_io("movl %0, %1", "xchgl %0, %1", X86_BUG_11AP,
+ alternative_io("movl %0, %P1", "xchgl %0, %P1", X86_BUG_11AP,
ASM_OUTPUT2("=r" (v), "=m" (*addr)),
ASM_OUTPUT2("0" (v), "m" (*addr)));
}
@@ -204,7 +204,6 @@ extern void clear_local_APIC(void);
extern void disconnect_bsp_APIC(int virt_wire_setup);
extern void disable_local_APIC(void);
extern void lapic_shutdown(void);
-extern int verify_local_APIC(void);
extern void sync_Arb_IDs(void);
extern void init_bsp_APIC(void);
extern void setup_local_APIC(void);
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h
index 2ab1eb33106e..959e45b81fe2 100644
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -95,13 +95,11 @@ do { \
* Stop RDTSC speculation. This is needed when you need to use RDTSC
* (or get_cycles or vread that possibly accesses the TSC) in a defined
* code region.
- *
- * (Could use an alternative three way for this if there was one.)
*/
static __always_inline void rdtsc_barrier(void)
{
- alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
- alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+ alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+ "lfence", X86_FEATURE_LFENCE_RDTSC);
}
#endif /* _ASM_X86_BARRIER_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 1f1297b46f83..1c8b50edb2db 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -55,143 +55,157 @@ For 32-bit we have the following conventions - kernel is built with
* for assembly code:
*/
-#define R15 0
-#define R14 8
-#define R13 16
-#define R12 24
-#define RBP 32
-#define RBX 40
-
-/* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11 48
-#define R10 56
-#define R9 64
-#define R8 72
-#define RAX 80
-#define RCX 88
-#define RDX 96
-#define RSI 104
-#define RDI 112
-#define ORIG_RAX 120 /* + error_code */
-/* end of arguments */
-
-/* cpu exception frame or undefined in case of fast syscall: */
-#define RIP 128
-#define CS 136
-#define EFLAGS 144
-#define RSP 152
-#define SS 160
-
-#define ARGOFFSET R11
-
- .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1, rax_enosys=0
- subq $9*8+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET 9*8+\addskip
- movq_cfi rdi, 8*8
- movq_cfi rsi, 7*8
- movq_cfi rdx, 6*8
-
- .if \save_rcx
- movq_cfi rcx, 5*8
- .endif
+/* The layout forms the "struct pt_regs" on the stack: */
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
+#define R15 0*8
+#define R14 1*8
+#define R13 2*8
+#define R12 3*8
+#define RBP 4*8
+#define RBX 5*8
+/* These regs are callee-clobbered. Always saved on kernel entry. */
+#define R11 6*8
+#define R10 7*8
+#define R9 8*8
+#define R8 9*8
+#define RAX 10*8
+#define RCX 11*8
+#define RDX 12*8
+#define RSI 13*8
+#define RDI 14*8
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 15*8
+/* Return frame for iretq */
+#define RIP 16*8
+#define CS 17*8
+#define EFLAGS 18*8
+#define RSP 19*8
+#define SS 20*8
+
+#define SIZEOF_PTREGS 21*8
+
+ .macro ALLOC_PT_GPREGS_ON_STACK addskip=0
+ subq $15*8+\addskip, %rsp
+ CFI_ADJUST_CFA_OFFSET 15*8+\addskip
+ .endm
- .if \rax_enosys
- movq $-ENOSYS, 4*8(%rsp)
- .else
- movq_cfi rax, 4*8
+ .macro SAVE_C_REGS_HELPER offset=0 rax=1 rcx=1 r8910=1 r11=1
+ .if \r11
+ movq_cfi r11, 6*8+\offset
.endif
-
- .if \save_r891011
- movq_cfi r8, 3*8
- movq_cfi r9, 2*8
- movq_cfi r10, 1*8
- movq_cfi r11, 0*8
+ .if \r8910
+ movq_cfi r10, 7*8+\offset
+ movq_cfi r9, 8*8+\offset
+ movq_cfi r8, 9*8+\offset
+ .endif
+ .if \rax
+ movq_cfi rax, 10*8+\offset
+ .endif
+ .if \rcx
+ movq_cfi rcx, 11*8+\offset
.endif
+ movq_cfi rdx, 12*8+\offset
+ movq_cfi rsi, 13*8+\offset
+ movq_cfi rdi, 14*8+\offset
+ .endm
+ .macro SAVE_C_REGS offset=0
+ SAVE_C_REGS_HELPER \offset, 1, 1, 1, 1
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RAX_RCX offset=0
+ SAVE_C_REGS_HELPER \offset, 0, 0, 1, 1
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_R891011
+ SAVE_C_REGS_HELPER 0, 1, 1, 0, 0
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RCX_R891011
+ SAVE_C_REGS_HELPER 0, 1, 0, 0, 0
+ .endm
+ .macro SAVE_C_REGS_EXCEPT_RAX_RCX_R11
+ SAVE_C_REGS_HELPER 0, 0, 0, 1, 0
+ .endm
+
+ .macro SAVE_EXTRA_REGS offset=0
+ movq_cfi r15, 0*8+\offset
+ movq_cfi r14, 1*8+\offset
+ movq_cfi r13, 2*8+\offset
+ movq_cfi r12, 3*8+\offset
+ movq_cfi rbp, 4*8+\offset
+ movq_cfi rbx, 5*8+\offset
+ .endm
+ .macro SAVE_EXTRA_REGS_RBP offset=0
+ movq_cfi rbp, 4*8+\offset
+ .endm
+ .macro RESTORE_EXTRA_REGS offset=0
+ movq_cfi_restore 0*8+\offset, r15
+ movq_cfi_restore 1*8+\offset, r14
+ movq_cfi_restore 2*8+\offset, r13
+ movq_cfi_restore 3*8+\offset, r12
+ movq_cfi_restore 4*8+\offset, rbp
+ movq_cfi_restore 5*8+\offset, rbx
.endm
-#define ARG_SKIP (9*8)
+ .macro ZERO_EXTRA_REGS
+ xorl %r15d, %r15d
+ xorl %r14d, %r14d
+ xorl %r13d, %r13d
+ xorl %r12d, %r12d
+ xorl %ebp, %ebp
+ xorl %ebx, %ebx
+ .endm
- .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \
- rstor_r8910=1, rstor_rdx=1
+ .macro RESTORE_C_REGS_HELPER rstor_rax=1, rstor_rcx=1, rstor_r11=1, rstor_r8910=1, rstor_rdx=1
.if \rstor_r11
- movq_cfi_restore 0*8, r11
+ movq_cfi_restore 6*8, r11
.endif
-
.if \rstor_r8910
- movq_cfi_restore 1*8, r10
- movq_cfi_restore 2*8, r9
- movq_cfi_restore 3*8, r8
+ movq_cfi_restore 7*8, r10
+ movq_cfi_restore 8*8, r9
+ movq_cfi_restore 9*8, r8
.endif
-
.if \rstor_rax
- movq_cfi_restore 4*8, rax
+ movq_cfi_restore 10*8, rax
.endif
-
.if \rstor_rcx
- movq_cfi_restore 5*8, rcx
+ movq_cfi_restore 11*8, rcx
.endif
-
.if \rstor_rdx
- movq_cfi_restore 6*8, rdx
- .endif
-
- movq_cfi_restore 7*8, rsi
- movq_cfi_restore 8*8, rdi
-
- .if ARG_SKIP+\addskip > 0
- addq $ARG_SKIP+\addskip, %rsp
- CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
+ movq_cfi_restore 12*8, rdx
.endif
+ movq_cfi_restore 13*8, rsi
+ movq_cfi_restore 14*8, rdi
.endm
-
- .macro LOAD_ARGS offset, skiprax=0
- movq \offset(%rsp), %r11
- movq \offset+8(%rsp), %r10
- movq \offset+16(%rsp), %r9
- movq \offset+24(%rsp), %r8
- movq \offset+40(%rsp), %rcx
- movq \offset+48(%rsp), %rdx
- movq \offset+56(%rsp), %rsi
- movq \offset+64(%rsp), %rdi
- .if \skiprax
- .else
- movq \offset+72(%rsp), %rax
- .endif
+ .macro RESTORE_C_REGS
+ RESTORE_C_REGS_HELPER 1,1,1,1,1
.endm
-
-#define REST_SKIP (6*8)
-
- .macro SAVE_REST
- subq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET REST_SKIP
- movq_cfi rbx, 5*8
- movq_cfi rbp, 4*8
- movq_cfi r12, 3*8
- movq_cfi r13, 2*8
- movq_cfi r14, 1*8
- movq_cfi r15, 0*8
+ .macro RESTORE_C_REGS_EXCEPT_RAX
+ RESTORE_C_REGS_HELPER 0,1,1,1,1
.endm
-
- .macro RESTORE_REST
- movq_cfi_restore 0*8, r15
- movq_cfi_restore 1*8, r14
- movq_cfi_restore 2*8, r13
- movq_cfi_restore 3*8, r12
- movq_cfi_restore 4*8, rbp
- movq_cfi_restore 5*8, rbx
- addq $REST_SKIP, %rsp
- CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
+ .macro RESTORE_C_REGS_EXCEPT_RCX
+ RESTORE_C_REGS_HELPER 1,0,1,1,1
.endm
-
- .macro SAVE_ALL
- SAVE_ARGS
- SAVE_REST
+ .macro RESTORE_C_REGS_EXCEPT_R11
+ RESTORE_C_REGS_HELPER 1,1,0,1,1
+ .endm
+ .macro RESTORE_C_REGS_EXCEPT_RCX_R11
+ RESTORE_C_REGS_HELPER 1,0,0,1,1
+ .endm
+ .macro RESTORE_RSI_RDI
+ RESTORE_C_REGS_HELPER 0,0,0,0,0
+ .endm
+ .macro RESTORE_RSI_RDI_RDX
+ RESTORE_C_REGS_HELPER 0,0,0,0,1
.endm
- .macro RESTORE_ALL addskip=0
- RESTORE_REST
- RESTORE_ARGS 1, \addskip
+ .macro REMOVE_PT_GPREGS_FROM_STACK addskip=0
+ addq $15*8+\addskip, %rsp
+ CFI_ADJUST_CFA_OFFSET -(15*8+\addskip)
.endm
.macro icebp
@@ -210,37 +224,23 @@ For 32-bit we have the following conventions - kernel is built with
*/
.macro SAVE_ALL
- pushl_cfi %eax
- CFI_REL_OFFSET eax, 0
- pushl_cfi %ebp
- CFI_REL_OFFSET ebp, 0
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %edx
- CFI_REL_OFFSET edx, 0
- pushl_cfi %ecx
- CFI_REL_OFFSET ecx, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg eax
+ pushl_cfi_reg ebp
+ pushl_cfi_reg edi
+ pushl_cfi_reg esi
+ pushl_cfi_reg edx
+ pushl_cfi_reg ecx
+ pushl_cfi_reg ebx
.endm
.macro RESTORE_ALL
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %ecx
- CFI_RESTORE ecx
- popl_cfi %edx
- CFI_RESTORE edx
- popl_cfi %esi
- CFI_RESTORE esi
- popl_cfi %edi
- CFI_RESTORE edi
- popl_cfi %ebp
- CFI_RESTORE ebp
- popl_cfi %eax
- CFI_RESTORE eax
+ popl_cfi_reg ebx
+ popl_cfi_reg ecx
+ popl_cfi_reg edx
+ popl_cfi_reg esi
+ popl_cfi_reg edi
+ popl_cfi_reg ebp
+ popl_cfi_reg eax
.endm
#endif /* CONFIG_X86_64 */
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index 59c6c401f79f..acdee09228b3 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -301,7 +301,7 @@ static inline void __user *arch_compat_alloc_user_space(long len)
sp = task_pt_regs(current)->sp;
} else {
/* -128 for the x32 ABI redzone */
- sp = this_cpu_read(old_rsp) - 128;
+ sp = task_pt_regs(current)->sp - 128;
}
return (void __user *)round_down(sp - len, 16);
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 90a54851aedc..854c04b3c9c2 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -231,7 +231,9 @@
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
+#define X86_FEATURE_PCOMMIT ( 9*32+22) /* PCOMMIT instruction */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
@@ -418,6 +420,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* 1: do replace */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (X86_FEATURE_ALWAYS) : : t_warn);
@@ -432,6 +435,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P0\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 0\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
/* skipping size check since replacement size = 0 */
: : "i" (bit) : : t_no);
@@ -457,6 +461,7 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -483,31 +488,30 @@ static __always_inline __pure bool __static_cpu_has(u16 bit)
static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
{
#ifdef CC_HAVE_ASM_GOTO
-/*
- * We need to spell the jumps to the compiler because, depending on the offset,
- * the replacement jump can be bigger than the original jump, and this we cannot
- * have. Thus, we force the jump to the widest, 4-byte, signed relative
- * offset even though the last would often fit in less bytes.
- */
- asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ asm_volatile_goto("1: jmp %l[t_dynamic]\n"
"2:\n"
+ ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ "((5f-4f) - (2b-1b)),0x90\n"
+ "3:\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
- " .long 3f - .\n" /* repl offset */
+ " .long 4f - .\n" /* repl offset */
" .word %P1\n" /* always replace */
- " .byte 2b - 1b\n" /* src len */
- " .byte 4f - 3f\n" /* repl len */
+ " .byte 3b - 1b\n" /* src len */
+ " .byte 5f - 4f\n" /* repl len */
+ " .byte 3b - 2b\n" /* pad len */
".previous\n"
".section .altinstr_replacement,\"ax\"\n"
- "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
- "4:\n"
+ "4: jmp %l[t_no]\n"
+ "5:\n"
".previous\n"
".section .altinstructions,\"a\"\n"
" .long 1b - .\n" /* src offset */
" .long 0\n" /* no replacement */
" .word %P0\n" /* feature bit */
- " .byte 2b - 1b\n" /* src len */
+ " .byte 3b - 1b\n" /* src len */
" .byte 0\n" /* repl len */
+ " .byte 0\n" /* pad len */
".previous\n"
: : "i" (bit), "i" (X86_FEATURE_ALWAYS)
: : t_dynamic, t_no);
@@ -527,6 +531,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P2\n" /* always replace */
" .byte 2b - 1b\n" /* source len */
" .byte 4f - 3f\n" /* replacement len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
@@ -541,6 +546,7 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
" .word %P1\n" /* feature bit */
" .byte 4b - 3b\n" /* src len */
" .byte 6f - 5f\n" /* repl len */
+ " .byte 0\n" /* pad len */
".previous\n"
".section .discard,\"aw\",@progbits\n"
" .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index a94b82e8f156..a0bf89fd2647 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -376,11 +376,16 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
* Pentium F0 0F bugfix can have resulted in the mapped
* IDT being write-protected.
*/
-#define set_intr_gate(n, addr) \
+#define set_intr_gate_notrace(n, addr) \
do { \
BUG_ON((unsigned)n > 0xFF); \
_set_gate(n, GATE_INTERRUPT, (void *)addr, 0, 0, \
__KERNEL_CS); \
+ } while (0)
+
+#define set_intr_gate(n, addr) \
+ do { \
+ set_intr_gate_notrace(n, addr); \
_trace_set_gate(n, GATE_INTERRUPT, (void *)trace_##addr,\
0, 0, __KERNEL_CS); \
} while (0)
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h
index f6f15986df6c..de1cdaf4d743 100644
--- a/arch/x86/include/asm/dwarf2.h
+++ b/arch/x86/include/asm/dwarf2.h
@@ -86,11 +86,23 @@
CFI_ADJUST_CFA_OFFSET 8
.endm
+ .macro pushq_cfi_reg reg
+ pushq %\reg
+ CFI_ADJUST_CFA_OFFSET 8
+ CFI_REL_OFFSET \reg, 0
+ .endm
+
.macro popq_cfi reg
popq \reg
CFI_ADJUST_CFA_OFFSET -8
.endm
+ .macro popq_cfi_reg reg
+ popq %\reg
+ CFI_ADJUST_CFA_OFFSET -8
+ CFI_RESTORE \reg
+ .endm
+
.macro pushfq_cfi
pushfq
CFI_ADJUST_CFA_OFFSET 8
@@ -116,11 +128,23 @@
CFI_ADJUST_CFA_OFFSET 4
.endm
+ .macro pushl_cfi_reg reg
+ pushl %\reg
+ CFI_ADJUST_CFA_OFFSET 4
+ CFI_REL_OFFSET \reg, 0
+ .endm
+
.macro popl_cfi reg
popl \reg
CFI_ADJUST_CFA_OFFSET -4
.endm
+ .macro popl_cfi_reg reg
+ popl %\reg
+ CFI_ADJUST_CFA_OFFSET -4
+ CFI_RESTORE \reg
+ .endm
+
.macro pushfl_cfi
pushfl
CFI_ADJUST_CFA_OFFSET 4
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h
index 25bce45c6fc4..3738b138b843 100644
--- a/arch/x86/include/asm/efi.h
+++ b/arch/x86/include/asm/efi.h
@@ -2,6 +2,8 @@
#define _ASM_X86_EFI_H
#include <asm/i387.h>
+#include <asm/pgtable.h>
+
/*
* We map the EFI regions needed for runtime services non-contiguously,
* with preserved alignment on virtual addresses starting from -4G down
@@ -89,8 +91,8 @@ extern void __iomem *__init efi_ioremap(unsigned long addr, unsigned long size,
extern struct efi_scratch efi_scratch;
extern void __init efi_set_executable(efi_memory_desc_t *md, bool executable);
extern int __init efi_memblock_x86_reserve_range(void);
-extern void __init efi_call_phys_prolog(void);
-extern void __init efi_call_phys_epilog(void);
+extern pgd_t * __init efi_call_phys_prolog(void);
+extern void __init efi_call_phys_epilog(pgd_t *save_pgd);
extern void __init efi_unmap_memmap(void);
extern void __init efi_memory_uc(u64 addr, unsigned long size);
extern void __init efi_map_region(efi_memory_desc_t *md);
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index ca3347a9dab5..3563107b5060 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -171,10 +171,11 @@ do { \
static inline void elf_common_init(struct thread_struct *t,
struct pt_regs *regs, const u16 ds)
{
- regs->ax = regs->bx = regs->cx = regs->dx = 0;
- regs->si = regs->di = regs->bp = 0;
+ /* Commented-out registers are cleared in stub_execve */
+ /*regs->ax = regs->bx =*/ regs->cx = regs->dx = 0;
+ regs->si = regs->di /*= regs->bp*/ = 0;
regs->r8 = regs->r9 = regs->r10 = regs->r11 = 0;
- regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+ /*regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;*/
t->fs = t->gs = 0;
t->fsindex = t->gsindex = 0;
t->ds = t->es = ds;
diff --git a/arch/x86/include/asm/fpu-internal.h b/arch/x86/include/asm/fpu-internal.h
index 0dbc08282291..72ba21a8b5fc 100644
--- a/arch/x86/include/asm/fpu-internal.h
+++ b/arch/x86/include/asm/fpu-internal.h
@@ -370,7 +370,7 @@ static inline void drop_fpu(struct task_struct *tsk)
preempt_disable();
tsk->thread.fpu_counter = 0;
__drop_fpu(tsk);
- clear_used_math();
+ clear_stopped_child_used_math(tsk);
preempt_enable();
}
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 9662290e0b20..e9571ddabc4f 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h
@@ -181,10 +181,9 @@ extern __visible void smp_call_function_single_interrupt(struct pt_regs *);
extern __visible void smp_invalidate_interrupt(struct pt_regs *);
#endif
-extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
- - FIRST_EXTERNAL_VECTOR])(void);
+extern char irq_entries_start[];
#ifdef CONFIG_TRACING
-#define trace_interrupt interrupt
+#define trace_irq_entries_start irq_entries_start
#endif
#define VECTOR_UNDEFINED (-1)
diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h
index 47f29b1d1846..e7814b74caf8 100644
--- a/arch/x86/include/asm/insn.h
+++ b/arch/x86/include/asm/insn.h
@@ -69,7 +69,7 @@ struct insn {
const insn_byte_t *next_byte;
};
-#define MAX_INSN_SIZE 16
+#define MAX_INSN_SIZE 15
#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
diff --git a/arch/x86/include/asm/irqflags.h b/arch/x86/include/asm/irqflags.h
index 0a8b519226b8..b77f5edb03b0 100644
--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -136,10 +136,6 @@ static inline notrace unsigned long arch_local_irq_save(void)
#define USERGS_SYSRET32 \
swapgs; \
sysretl
-#define ENABLE_INTERRUPTS_SYSEXIT32 \
- swapgs; \
- sti; \
- sysexit
#else
#define INTERRUPT_RETURN iret
@@ -163,22 +159,27 @@ static inline int arch_irqs_disabled(void)
return arch_irqs_disabled_flags(flags);
}
+#endif /* !__ASSEMBLY__ */
+#ifdef __ASSEMBLY__
+#ifdef CONFIG_TRACE_IRQFLAGS
+# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
+# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
#else
-
-#ifdef CONFIG_X86_64
-#define ARCH_LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ \
+# define TRACE_IRQS_ON
+# define TRACE_IRQS_OFF
+#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+# ifdef CONFIG_X86_64
+# define LOCKDEP_SYS_EXIT call lockdep_sys_exit_thunk
+# define LOCKDEP_SYS_EXIT_IRQ \
TRACE_IRQS_ON; \
sti; \
- SAVE_REST; \
- LOCKDEP_SYS_EXIT; \
- RESTORE_REST; \
+ call lockdep_sys_exit_thunk; \
cli; \
TRACE_IRQS_OFF;
-
-#else
-#define ARCH_LOCKDEP_SYS_EXIT \
+# else
+# define LOCKDEP_SYS_EXIT \
pushl %eax; \
pushl %ecx; \
pushl %edx; \
@@ -186,24 +187,12 @@ static inline int arch_irqs_disabled(void)
popl %edx; \
popl %ecx; \
popl %eax;
-
-#define ARCH_LOCKDEP_SYS_EXIT_IRQ
-#endif
-
-#ifdef CONFIG_TRACE_IRQFLAGS
-# define TRACE_IRQS_ON call trace_hardirqs_on_thunk;
-# define TRACE_IRQS_OFF call trace_hardirqs_off_thunk;
+# define LOCKDEP_SYS_EXIT_IRQ
+# endif
#else
-# define TRACE_IRQS_ON
-# define TRACE_IRQS_OFF
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# define LOCKDEP_SYS_EXIT ARCH_LOCKDEP_SYS_EXIT
-# define LOCKDEP_SYS_EXIT_IRQ ARCH_LOCKDEP_SYS_EXIT_IRQ
-# else
# define LOCKDEP_SYS_EXIT
# define LOCKDEP_SYS_EXIT_IRQ
-# endif
-
+#endif
#endif /* __ASSEMBLY__ */
+
#endif
diff --git a/arch/x86/include/asm/jump_label.h b/arch/x86/include/asm/jump_label.h
index 6a2cefb4395a..a4c1cf7e93f8 100644
--- a/arch/x86/include/asm/jump_label.h
+++ b/arch/x86/include/asm/jump_label.h
@@ -1,7 +1,7 @@
#ifndef _ASM_X86_JUMP_LABEL_H
#define _ASM_X86_JUMP_LABEL_H
-#ifdef __KERNEL__
+#ifndef __ASSEMBLY__
#include <linux/stringify.h>
#include <linux/types.h>
@@ -30,8 +30,6 @@ l_yes:
return true;
}
-#endif /* __KERNEL__ */
-
#ifdef CONFIG_X86_64
typedef u64 jump_label_t;
#else
@@ -44,4 +42,5 @@ struct jump_entry {
jump_label_t key;
};
+#endif /* __ASSEMBLY__ */
#endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a236e39cc385..dea2e7e962e3 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,11 +81,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
}
-#define SELECTOR_TI_MASK (1 << 2)
-#define SELECTOR_RPL_MASK 0x03
-
-#define IOPL_SHIFT 12
-
#define KVM_PERMILLE_MMU_PAGES 20
#define KVM_MIN_ALLOC_MMU_PAGES 64
#define KVM_MMU_HASH_SHIFT 10
@@ -345,6 +340,7 @@ struct kvm_pmu {
enum {
KVM_DEBUGREG_BP_ENABLED = 1,
KVM_DEBUGREG_WONT_EXIT = 2,
+ KVM_DEBUGREG_RELOAD = 4,
};
struct kvm_vcpu_arch {
@@ -431,6 +427,9 @@ struct kvm_vcpu_arch {
int cpuid_nent;
struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+ int maxphyaddr;
+
/* emulate context */
struct x86_emulate_ctxt emulate_ctxt;
@@ -550,11 +549,20 @@ struct kvm_arch_memory_slot {
struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
};
+/*
+ * We use as the mode the number of bits allocated in the LDR for the
+ * logical processor ID. It happens that these are all powers of two.
+ * This makes it is very easy to detect cases where the APICs are
+ * configured for multiple modes; in that case, we cannot use the map and
+ * hence cannot use kvm_irq_delivery_to_apic_fast either.
+ */
+#define KVM_APIC_MODE_XAPIC_CLUSTER 4
+#define KVM_APIC_MODE_XAPIC_FLAT 8
+#define KVM_APIC_MODE_X2APIC 16
+
struct kvm_apic_map {
struct rcu_head rcu;
- u8 ldr_bits;
- /* fields bellow are used to decode ldr values in different modes */
- u32 cid_shift, cid_mask, lid_mask, broadcast;
+ u8 mode;
struct kvm_lapic *phys_map[256];
/* first index is cluster id second is cpu id in a cluster */
struct kvm_lapic *logical_map[16][16];
@@ -859,6 +867,8 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
struct kvm_memory_slot *memslot);
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ struct kvm_memory_slot *memslot);
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot);
void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
@@ -933,6 +943,7 @@ struct x86_emulate_ctxt;
int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, unsigned short port);
void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
int kvm_emulate_halt(struct kvm_vcpu *vcpu);
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu);
int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
@@ -1128,7 +1139,6 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index e62cf897f781..c1adf33fdd0d 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -115,7 +115,7 @@ static inline void kvm_spinlock_init(void)
static inline bool kvm_para_available(void)
{
- return 0;
+ return false;
}
static inline unsigned int kvm_arch_para_features(void)
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h
index a1410db38a1a..653dfa7662e1 100644
--- a/arch/x86/include/asm/mwait.h
+++ b/arch/x86/include/asm/mwait.h
@@ -30,6 +30,14 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
:: "a" (eax), "c" (ecx));
}
+static inline void __sti_mwait(unsigned long eax, unsigned long ecx)
+{
+ trace_hardirqs_on();
+ /* "mwait %eax, %ecx;" */
+ asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
+ :: "a" (eax), "c" (ecx));
+}
+
/*
* This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
* which can obviate IPI to trigger checking of need_resched.
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index 95e11f79f123..f97fbe3abb67 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -51,8 +51,6 @@ extern int devmem_is_allowed(unsigned long pagenr);
extern unsigned long max_low_pfn_mapped;
extern unsigned long max_pfn_mapped;
-extern bool kaslr_enabled;
-
static inline phys_addr_t get_max_mapped(void)
{
return (phys_addr_t)max_pfn_mapped << PAGE_SHIFT;
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 965c47d254aa..5f6051d5d139 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -976,11 +976,6 @@ extern void default_banner(void);
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_usergs_sysret64))
-
-#define ENABLE_INTERRUPTS_SYSEXIT32 \
- PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_irq_enable_sysexit), \
- CLBR_NONE, \
- jmp PARA_INDIRECT(pv_cpu_ops+PV_CPU_irq_enable_sysexit))
#endif /* CONFIG_X86_32 */
#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195dae425..164e3f8d3c3d 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h
@@ -93,6 +93,8 @@ extern raw_spinlock_t pci_config_lock;
extern int (*pcibios_enable_irq)(struct pci_dev *dev);
extern void (*pcibios_disable_irq)(struct pci_dev *dev);
+extern bool mp_should_keep_irq(struct device *dev);
+
struct pci_raw_ops {
int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
int reg, int len, u32 *val);
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index ec1c93588cef..d2203b5d9538 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -210,8 +210,23 @@ struct x86_hw_tss {
unsigned long sp0;
unsigned short ss0, __ss0h;
unsigned long sp1;
- /* ss1 caches MSR_IA32_SYSENTER_CS: */
- unsigned short ss1, __ss1h;
+
+ /*
+ * We don't use ring 1, so ss1 is a convenient scratch space in
+ * the same cacheline as sp0. We use ss1 to cache the value in
+ * MSR_IA32_SYSENTER_CS. When we context switch
+ * MSR_IA32_SYSENTER_CS, we first check if the new value being
+ * written matches ss1, and, if it's not, then we wrmsr the new
+ * value and update ss1.
+ *
+ * The only reason we context switch MSR_IA32_SYSENTER_CS is
+ * that we set it to zero in vm86 tasks to avoid corrupting the
+ * stack if we were to go through the sysenter path from vm86
+ * mode.
+ */
+ unsigned short ss1; /* MSR_IA32_SYSENTER_CS */
+
+ unsigned short __ss1h;
unsigned long sp2;
unsigned short ss2, __ss2h;
unsigned long __cr3;
@@ -276,13 +291,17 @@ struct tss_struct {
unsigned long io_bitmap[IO_BITMAP_LONGS + 1];
/*
- * .. and then another 0x100 bytes for the emergency kernel stack:
+ * Space for the temporary SYSENTER stack:
*/
- unsigned long stack[64];
+ unsigned long SYSENTER_stack[64];
} ____cacheline_aligned;
-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
+DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss);
+
+#ifdef CONFIG_X86_32
+DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
+#endif
/*
* Save the original ist values for checking stack pointers during debugging
@@ -474,7 +493,6 @@ struct thread_struct {
#ifdef CONFIG_X86_32
unsigned long sysenter_cs;
#else
- unsigned long usersp; /* Copy from PDA */
unsigned short es;
unsigned short ds;
unsigned short fsindex;
@@ -564,6 +582,16 @@ static inline void native_swapgs(void)
#endif
}
+static inline unsigned long current_top_of_stack(void)
+{
+#ifdef CONFIG_X86_64
+ return this_cpu_read_stable(cpu_tss.x86_tss.sp0);
+#else
+ /* sp0 on x86_32 is special in and around vm86 mode. */
+ return this_cpu_read_stable(cpu_current_top_of_stack);
+#endif
+}
+
#ifdef CONFIG_PARAVIRT
#include <asm/paravirt.h>
#else
@@ -761,10 +789,10 @@ extern char ignore_fpu_irq;
#define ARCH_HAS_SPINLOCK_PREFETCH
#ifdef CONFIG_X86_32
-# define BASE_PREFETCH ASM_NOP4
+# define BASE_PREFETCH ""
# define ARCH_HAS_PREFETCH
#else
-# define BASE_PREFETCH "prefetcht0 (%1)"
+# define BASE_PREFETCH "prefetcht0 %P1"
#endif
/*
@@ -775,10 +803,9 @@ extern char ignore_fpu_irq;
*/
static inline void prefetch(const void *x)
{
- alternative_input(BASE_PREFETCH,
- "prefetchnta (%1)",
+ alternative_input(BASE_PREFETCH, "prefetchnta %P1",
X86_FEATURE_XMM,
- "r" (x));
+ "m" (*(const char *)x));
}
/*
@@ -788,10 +815,9 @@ static inline void prefetch(const void *x)
*/
static inline void prefetchw(const void *x)
{
- alternative_input(BASE_PREFETCH,
- "prefetchw (%1)",
- X86_FEATURE_3DNOW,
- "r" (x));
+ alternative_input(BASE_PREFETCH, "prefetchw %P1",
+ X86_FEATURE_3DNOWPREFETCH,
+ "m" (*(const char *)x));
}
static inline void spin_lock_prefetch(const void *x)
@@ -799,6 +825,9 @@ static inline void spin_lock_prefetch(const void *x)
prefetchw(x);
}
+#define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \
+ TOP_OF_KERNEL_STACK_PADDING)
+
#ifdef CONFIG_X86_32
/*
* User space process size: 3GB (default).
@@ -809,39 +838,16 @@ static inline void spin_lock_prefetch(const void *x)
#define STACK_TOP_MAX STACK_TOP
#define INIT_THREAD { \
- .sp0 = sizeof(init_stack) + (long)&init_stack, \
+ .sp0 = TOP_OF_INIT_STACK, \
.vm86_info = NULL, \
.sysenter_cs = __KERNEL_CS, \
.io_bitmap_ptr = NULL, \
}
-/*
- * Note that the .io_bitmap member must be extra-big. This is because
- * the CPU will access an additional byte beyond the end of the IO
- * permission bitmap. The extra byte must be all 1 bits, and must
- * be within the limit.
- */
-#define INIT_TSS { \
- .x86_tss = { \
- .sp0 = sizeof(init_stack) + (long)&init_stack, \
- .ss0 = __KERNEL_DS, \
- .ss1 = __KERNEL_CS, \
- .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, \
- }, \
- .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 }, \
-}
-
extern unsigned long thread_saved_pc(struct task_struct *tsk);
-#define THREAD_SIZE_LONGS (THREAD_SIZE/sizeof(unsigned long))
-#define KSTK_TOP(info) \
-({ \
- unsigned long *__ptr = (unsigned long *)(info); \
- (unsigned long)(&__ptr[THREAD_SIZE_LONGS]); \
-})
-
/*
- * The below -8 is to reserve 8 bytes on top of the ring0 stack.
+ * TOP_OF_KERNEL_STACK_PADDING reserves 8 bytes on top of the ring0 stack.
* This is necessary to guarantee that the entire "struct pt_regs"
* is accessible even if the CPU haven't stored the SS/ESP registers
* on the stack (interrupt gate does not save these registers
@@ -850,11 +856,11 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
* "struct pt_regs" is possible, but they may contain the
* completely wrong values.
*/
-#define task_pt_regs(task) \
-({ \
- struct pt_regs *__regs__; \
- __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \
- __regs__ - 1; \
+#define task_pt_regs(task) \
+({ \
+ unsigned long __ptr = (unsigned long)task_stack_page(task); \
+ __ptr += THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; \
+ ((struct pt_regs *)__ptr) - 1; \
})
#define KSTK_ESP(task) (task_pt_regs(task)->sp)
@@ -886,11 +892,7 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define STACK_TOP_MAX TASK_SIZE_MAX
#define INIT_THREAD { \
- .sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
-}
-
-#define INIT_TSS { \
- .x86_tss.sp0 = (unsigned long)&init_stack + sizeof(init_stack) \
+ .sp0 = TOP_OF_INIT_STACK \
}
/*
@@ -902,11 +904,6 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk);
#define task_pt_regs(tsk) ((struct pt_regs *)(tsk)->thread.sp0 - 1)
extern unsigned long KSTK_ESP(struct task_struct *task);
-/*
- * User space RSP while inside the SYSCALL fast path
- */
-DECLARE_PER_CPU(unsigned long, old_rsp);
-
#endif /* CONFIG_X86_64 */
extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 86fc2bb82287..19507ffa5d28 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -31,13 +31,17 @@ struct pt_regs {
#else /* __i386__ */
struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long bp;
unsigned long bx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -47,9 +51,12 @@ struct pt_regs {
unsigned long dx;
unsigned long si;
unsigned long di;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
unsigned long orig_ax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
unsigned long ip;
unsigned long cs;
unsigned long flags;
@@ -89,11 +96,13 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
}
/*
- * user_mode_vm(regs) determines whether a register set came from user mode.
- * This is true if V8086 mode was enabled OR if the register set was from
- * protected mode with RPL-3 CS value. This tricky test checks that with
- * one comparison. Many places in the kernel can bypass this full check
- * if they have already ruled out V8086 mode, so user_mode(regs) can be used.
+ * user_mode(regs) determines whether a register set came from user
+ * mode. On x86_32, this is true if V8086 mode was enabled OR if the
+ * register set was from protected mode with RPL-3 CS value. This
+ * tricky test checks that with one comparison.
+ *
+ * On x86_64, vm86 mode is mercifully nonexistent, and we don't need
+ * the extra check.
*/
static inline int user_mode(struct pt_regs *regs)
{
@@ -104,16 +113,6 @@ static inline int user_mode(struct pt_regs *regs)
#endif
}
-static inline int user_mode_vm(struct pt_regs *regs)
-{
-#ifdef CONFIG_X86_32
- return ((regs->cs & SEGMENT_RPL_MASK) | (regs->flags & X86_VM_MASK)) >=
- USER_RPL;
-#else
- return user_mode(regs);
-#endif
-}
-
static inline int v8086_mode(struct pt_regs *regs)
{
#ifdef CONFIG_X86_32
@@ -138,12 +137,8 @@ static inline bool user_64bit_mode(struct pt_regs *regs)
#endif
}
-#define current_user_stack_pointer() this_cpu_read(old_rsp)
-/* ia32 vs. x32 difference */
-#define compat_user_stack_pointer() \
- (test_thread_flag(TIF_IA32) \
- ? current_pt_regs()->sp \
- : this_cpu_read(old_rsp))
+#define current_user_stack_pointer() current_pt_regs()->sp
+#define compat_user_stack_pointer() current_pt_regs()->sp
#endif
#ifdef CONFIG_X86_32
@@ -248,7 +243,7 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
*/
#define arch_ptrace_stop_needed(code, info) \
({ \
- set_thread_flag(TIF_NOTIFY_RESUME); \
+ force_iret(); \
false; \
})
diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h
index d6b078e9fa28..25b1cc07d496 100644
--- a/arch/x86/include/asm/pvclock.h
+++ b/arch/x86/include/asm/pvclock.h
@@ -95,6 +95,7 @@ unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
struct pvclock_vsyscall_time_info {
struct pvclock_vcpu_time_info pvti;
+ u32 migrate_count;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define PVTI_SIZE sizeof(struct pvclock_vsyscall_time_info)
diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h
index db257a58571f..5a9856eb12ba 100644
--- a/arch/x86/include/asm/segment.h
+++ b/arch/x86/include/asm/segment.h
@@ -3,8 +3,10 @@
#include <linux/const.h>
-/* Constructor for a conventional segment GDT (or LDT) entry */
-/* This is a macro so it can be used in initializers */
+/*
+ * Constructor for a conventional segment GDT (or LDT) entry.
+ * This is a macro so it can be used in initializers.
+ */
#define GDT_ENTRY(flags, base, limit) \
((((base) & _AC(0xff000000,ULL)) << (56-24)) | \
(((flags) & _AC(0x0000f0ff,ULL)) << 40) | \
@@ -12,198 +14,228 @@
(((base) & _AC(0x00ffffff,ULL)) << 16) | \
(((limit) & _AC(0x0000ffff,ULL))))
-/* Simple and small GDT entries for booting only */
+/* Simple and small GDT entries for booting only: */
#define GDT_ENTRY_BOOT_CS 2
-#define __BOOT_CS (GDT_ENTRY_BOOT_CS * 8)
+#define GDT_ENTRY_BOOT_DS 3
+#define GDT_ENTRY_BOOT_TSS 4
+#define __BOOT_CS (GDT_ENTRY_BOOT_CS*8)
+#define __BOOT_DS (GDT_ENTRY_BOOT_DS*8)
+#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS*8)
+
+/*
+ * Bottom two bits of selector give the ring
+ * privilege level
+ */
+#define SEGMENT_RPL_MASK 0x3
-#define GDT_ENTRY_BOOT_DS (GDT_ENTRY_BOOT_CS + 1)
-#define __BOOT_DS (GDT_ENTRY_BOOT_DS * 8)
+/* User mode is privilege level 3: */
+#define USER_RPL 0x3
-#define GDT_ENTRY_BOOT_TSS (GDT_ENTRY_BOOT_CS + 2)
-#define __BOOT_TSS (GDT_ENTRY_BOOT_TSS * 8)
+/* Bit 2 is Table Indicator (TI): selects between LDT or GDT */
+#define SEGMENT_TI_MASK 0x4
+/* LDT segment has TI set ... */
+#define SEGMENT_LDT 0x4
+/* ... GDT has it cleared */
+#define SEGMENT_GDT 0x0
-#define SEGMENT_RPL_MASK 0x3 /*
- * Bottom two bits of selector give the ring
- * privilege level
- */
-#define SEGMENT_TI_MASK 0x4 /* Bit 2 is table indicator (LDT/GDT) */
-#define USER_RPL 0x3 /* User mode is privilege level 3 */
-#define SEGMENT_LDT 0x4 /* LDT segment has TI set... */
-#define SEGMENT_GDT 0x0 /* ... GDT has it cleared */
+#define GDT_ENTRY_INVALID_SEG 0
#ifdef CONFIG_X86_32
/*
* The layout of the per-CPU GDT under Linux:
*
- * 0 - null
+ * 0 - null <=== cacheline #1
* 1 - reserved
* 2 - reserved
* 3 - reserved
*
- * 4 - unused <==== new cacheline
+ * 4 - unused <=== cacheline #2
* 5 - unused
*
* ------- start of TLS (Thread-Local Storage) segments:
*
* 6 - TLS segment #1 [ glibc's TLS segment ]
* 7 - TLS segment #2 [ Wine's %fs Win32 segment ]
- * 8 - TLS segment #3
+ * 8 - TLS segment #3 <=== cacheline #3
* 9 - reserved
* 10 - reserved
* 11 - reserved
*
* ------- start of kernel segments:
*
- * 12 - kernel code segment <==== new cacheline
+ * 12 - kernel code segment <=== cacheline #4
* 13 - kernel data segment
* 14 - default user CS
* 15 - default user DS
- * 16 - TSS
+ * 16 - TSS <=== cacheline #5
* 17 - LDT
* 18 - PNPBIOS support (16->32 gate)
* 19 - PNPBIOS support
- * 20 - PNPBIOS support
+ * 20 - PNPBIOS support <=== cacheline #6
* 21 - PNPBIOS support
* 22 - PNPBIOS support
* 23 - APM BIOS support
- * 24 - APM BIOS support
+ * 24 - APM BIOS support <=== cacheline #7
* 25 - APM BIOS support
*
* 26 - ESPFIX small SS
* 27 - per-cpu [ offset to per-cpu data area ]
- * 28 - stack_canary-20 [ for stack protector ]
+ * 28 - stack_canary-20 [ for stack protector ] <=== cacheline #8
* 29 - unused
* 30 - unused
* 31 - TSS for double fault handler
*/
-#define GDT_ENTRY_TLS_MIN 6
-#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_TLS_MIN 6
+#define GDT_ENTRY_TLS_MAX (GDT_ENTRY_TLS_MIN + GDT_ENTRY_TLS_ENTRIES - 1)
+#define GDT_ENTRY_KERNEL_CS 12
+#define GDT_ENTRY_KERNEL_DS 13
#define GDT_ENTRY_DEFAULT_USER_CS 14
-
#define GDT_ENTRY_DEFAULT_USER_DS 15
+#define GDT_ENTRY_TSS 16
+#define GDT_ENTRY_LDT 17
+#define GDT_ENTRY_PNPBIOS_CS32 18
+#define GDT_ENTRY_PNPBIOS_CS16 19
+#define GDT_ENTRY_PNPBIOS_DS 20
+#define GDT_ENTRY_PNPBIOS_TS1 21
+#define GDT_ENTRY_PNPBIOS_TS2 22
+#define GDT_ENTRY_APMBIOS_BASE 23
+
+#define GDT_ENTRY_ESPFIX_SS 26
+#define GDT_ENTRY_PERCPU 27
+#define GDT_ENTRY_STACK_CANARY 28
+
+#define GDT_ENTRY_DOUBLEFAULT_TSS 31
-#define GDT_ENTRY_KERNEL_BASE (12)
+/*
+ * Number of entries in the GDT table:
+ */
+#define GDT_ENTRIES 32
-#define GDT_ENTRY_KERNEL_CS (GDT_ENTRY_KERNEL_BASE+0)
+/*
+ * Segment selector values corresponding to the above entries:
+ */
-#define GDT_ENTRY_KERNEL_DS (GDT_ENTRY_KERNEL_BASE+1)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
-#define GDT_ENTRY_TSS (GDT_ENTRY_KERNEL_BASE+4)
-#define GDT_ENTRY_LDT (GDT_ENTRY_KERNEL_BASE+5)
+/* segment for calling fn: */
+#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32*8)
+/* code segment for BIOS: */
+#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16*8)
-#define GDT_ENTRY_PNPBIOS_BASE (GDT_ENTRY_KERNEL_BASE+6)
-#define GDT_ENTRY_APMBIOS_BASE (GDT_ENTRY_KERNEL_BASE+11)
+/* "Is this PNP code selector (PNP_CS32 or PNP_CS16)?" */
+#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == PNP_CS32)
-#define GDT_ENTRY_ESPFIX_SS (GDT_ENTRY_KERNEL_BASE+14)
-#define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8)
+/* data segment for BIOS: */
+#define PNP_DS (GDT_ENTRY_PNPBIOS_DS*8)
+/* transfer data segment: */
+#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1*8)
+/* another data segment: */
+#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2*8)
-#define GDT_ENTRY_PERCPU (GDT_ENTRY_KERNEL_BASE+15)
#ifdef CONFIG_SMP
-#define __KERNEL_PERCPU (GDT_ENTRY_PERCPU * 8)
+# define __KERNEL_PERCPU (GDT_ENTRY_PERCPU*8)
#else
-#define __KERNEL_PERCPU 0
+# define __KERNEL_PERCPU 0
#endif
-#define GDT_ENTRY_STACK_CANARY (GDT_ENTRY_KERNEL_BASE+16)
#ifdef CONFIG_CC_STACKPROTECTOR
-#define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
+# define __KERNEL_STACK_CANARY (GDT_ENTRY_STACK_CANARY*8)
#else
-#define __KERNEL_STACK_CANARY 0
+# define __KERNEL_STACK_CANARY 0
#endif
-#define GDT_ENTRY_DOUBLEFAULT_TSS 31
-
-/*
- * The GDT has 32 entries
- */
-#define GDT_ENTRIES 32
+#else /* 64-bit: */
-/* The PnP BIOS entries in the GDT */
-#define GDT_ENTRY_PNPBIOS_CS32 (GDT_ENTRY_PNPBIOS_BASE + 0)
-#define GDT_ENTRY_PNPBIOS_CS16 (GDT_ENTRY_PNPBIOS_BASE + 1)
-#define GDT_ENTRY_PNPBIOS_DS (GDT_ENTRY_PNPBIOS_BASE + 2)
-#define GDT_ENTRY_PNPBIOS_TS1 (GDT_ENTRY_PNPBIOS_BASE + 3)
-#define GDT_ENTRY_PNPBIOS_TS2 (GDT_ENTRY_PNPBIOS_BASE + 4)
-
-/* The PnP BIOS selectors */
-#define PNP_CS32 (GDT_ENTRY_PNPBIOS_CS32 * 8) /* segment for calling fn */
-#define PNP_CS16 (GDT_ENTRY_PNPBIOS_CS16 * 8) /* code segment for BIOS */
-#define PNP_DS (GDT_ENTRY_PNPBIOS_DS * 8) /* data segment for BIOS */
-#define PNP_TS1 (GDT_ENTRY_PNPBIOS_TS1 * 8) /* transfer data segment */
-#define PNP_TS2 (GDT_ENTRY_PNPBIOS_TS2 * 8) /* another data segment */
+#include <asm/cache.h>
+#define GDT_ENTRY_KERNEL32_CS 1
+#define GDT_ENTRY_KERNEL_CS 2
+#define GDT_ENTRY_KERNEL_DS 3
/*
- * Matching rules for certain types of segments.
+ * We cannot use the same code segment descriptor for user and kernel mode,
+ * not even in long flat mode, because of different DPL.
+ *
+ * GDT layout to get 64-bit SYSCALL/SYSRET support right. SYSRET hardcodes
+ * selectors:
+ *
+ * if returning to 32-bit userspace: cs = STAR.SYSRET_CS,
+ * if returning to 64-bit userspace: cs = STAR.SYSRET_CS+16,
+ *
+ * ss = STAR.SYSRET_CS+8 (in either case)
+ *
+ * thus USER_DS should be between 32-bit and 64-bit code selectors:
*/
+#define GDT_ENTRY_DEFAULT_USER32_CS 4
+#define GDT_ENTRY_DEFAULT_USER_DS 5
+#define GDT_ENTRY_DEFAULT_USER_CS 6
-/* Matches PNP_CS32 and PNP_CS16 (they must be consecutive) */
-#define SEGMENT_IS_PNP_CODE(x) (((x) & 0xf4) == GDT_ENTRY_PNPBIOS_BASE * 8)
-
+/* Needs two entries */
+#define GDT_ENTRY_TSS 8
+/* Needs two entries */
+#define GDT_ENTRY_LDT 10
-#else
-#include <asm/cache.h>
-
-#define GDT_ENTRY_KERNEL32_CS 1
-#define GDT_ENTRY_KERNEL_CS 2
-#define GDT_ENTRY_KERNEL_DS 3
+#define GDT_ENTRY_TLS_MIN 12
+#define GDT_ENTRY_TLS_MAX 14
-#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS * 8)
+/* Abused to load per CPU data from limit */
+#define GDT_ENTRY_PER_CPU 15
/*
- * we cannot use the same code segment descriptor for user and kernel
- * -- not even in the long flat mode, because of different DPL /kkeil
- * The segment offset needs to contain a RPL. Grr. -AK
- * GDT layout to get 64bit syscall right (sysret hardcodes gdt offsets)
+ * Number of entries in the GDT table:
*/
-#define GDT_ENTRY_DEFAULT_USER32_CS 4
-#define GDT_ENTRY_DEFAULT_USER_DS 5
-#define GDT_ENTRY_DEFAULT_USER_CS 6
-#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3)
-#define __USER32_DS __USER_DS
-
-#define GDT_ENTRY_TSS 8 /* needs two entries */
-#define GDT_ENTRY_LDT 10 /* needs two entries */
-#define GDT_ENTRY_TLS_MIN 12
-#define GDT_ENTRY_TLS_MAX 14
-
-#define GDT_ENTRY_PER_CPU 15 /* Abused to load per CPU data from limit */
-#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU * 8 + 3)
+#define GDT_ENTRIES 16
-/* TLS indexes for 64bit - hardcoded in arch_prctl */
-#define FS_TLS 0
-#define GS_TLS 1
-
-#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
-#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
-
-#define GDT_ENTRIES 16
+/*
+ * Segment selector values corresponding to the above entries:
+ *
+ * Note, selectors also need to have a correct RPL,
+ * expressed with the +3 value for user-space selectors:
+ */
+#define __KERNEL32_CS (GDT_ENTRY_KERNEL32_CS*8)
+#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
+#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
+#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8 + 3)
+#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3)
+#define __USER32_DS __USER_DS
+#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3)
+#define __PER_CPU_SEG (GDT_ENTRY_PER_CPU*8 + 3)
+
+/* TLS indexes for 64-bit - hardcoded in arch_prctl(): */
+#define FS_TLS 0
+#define GS_TLS 1
+
+#define GS_TLS_SEL ((GDT_ENTRY_TLS_MIN+GS_TLS)*8 + 3)
+#define FS_TLS_SEL ((GDT_ENTRY_TLS_MIN+FS_TLS)*8 + 3)
#endif
-#define __KERNEL_CS (GDT_ENTRY_KERNEL_CS*8)
-#define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8)
-#define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8+3)
-#define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8+3)
#ifndef CONFIG_PARAVIRT
-#define get_kernel_rpl() 0
+# define get_kernel_rpl() 0
#endif
-#define IDT_ENTRIES 256
-#define NUM_EXCEPTION_VECTORS 32
-/* Bitmask of exception vectors which push an error code on the stack */
-#define EXCEPTION_ERRCODE_MASK 0x00027d00
-#define GDT_SIZE (GDT_ENTRIES * 8)
-#define GDT_ENTRY_TLS_ENTRIES 3
-#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES * 8)
+#define IDT_ENTRIES 256
+#define NUM_EXCEPTION_VECTORS 32
+
+/* Bitmask of exception vectors which push an error code on the stack: */
+#define EXCEPTION_ERRCODE_MASK 0x00027d00
+
+#define GDT_SIZE (GDT_ENTRIES*8)
+#define GDT_ENTRY_TLS_ENTRIES 3
+#define TLS_SIZE (GDT_ENTRY_TLS_ENTRIES* 8)
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
+
extern const char early_idt_handlers[NUM_EXCEPTION_VECTORS][2+2+5];
#ifdef CONFIG_TRACING
-#define trace_early_idt_handlers early_idt_handlers
+# define trace_early_idt_handlers early_idt_handlers
#endif
/*
@@ -228,37 +260,30 @@ do { \
} while (0)
/*
- * Save a segment register away
+ * Save a segment register away:
*/
#define savesegment(seg, value) \
asm("mov %%" #seg ",%0":"=r" (value) : : "memory")
/*
- * x86_32 user gs accessors.
+ * x86-32 user GS accessors:
*/
#ifdef CONFIG_X86_32
-#ifdef CONFIG_X86_32_LAZY_GS
-#define get_user_gs(regs) (u16)({unsigned long v; savesegment(gs, v); v;})
-#define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
-#define task_user_gs(tsk) ((tsk)->thread.gs)
-#define lazy_save_gs(v) savesegment(gs, (v))
-#define lazy_load_gs(v) loadsegment(gs, (v))
-#else /* X86_32_LAZY_GS */
-#define get_user_gs(regs) (u16)((regs)->gs)
-#define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
-#define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
-#define lazy_save_gs(v) do { } while (0)
-#define lazy_load_gs(v) do { } while (0)
-#endif /* X86_32_LAZY_GS */
+# ifdef CONFIG_X86_32_LAZY_GS
+# define get_user_gs(regs) (u16)({ unsigned long v; savesegment(gs, v); v; })
+# define set_user_gs(regs, v) loadsegment(gs, (unsigned long)(v))
+# define task_user_gs(tsk) ((tsk)->thread.gs)
+# define lazy_save_gs(v) savesegment(gs, (v))
+# define lazy_load_gs(v) loadsegment(gs, (v))
+# else /* X86_32_LAZY_GS */
+# define get_user_gs(regs) (u16)((regs)->gs)
+# define set_user_gs(regs, v) do { (regs)->gs = (v); } while (0)
+# define task_user_gs(tsk) (task_pt_regs(tsk)->gs)
+# define lazy_save_gs(v) do { } while (0)
+# define lazy_load_gs(v) do { } while (0)
+# endif /* X86_32_LAZY_GS */
#endif /* X86_32 */
-static inline unsigned long get_limit(unsigned long segment)
-{
- unsigned long __limit;
- asm("lsll %1,%0" : "=r" (__limit) : "r" (segment));
- return __limit + 1;
-}
-
#endif /* !__ASSEMBLY__ */
#endif /* __KERNEL__ */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ff4e7b236e21..f69e06b283fb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -66,6 +66,11 @@ static inline void x86_ce4100_early_setup(void) { }
*/
extern struct boot_params boot_params;
+static inline bool kaslr_enabled(void)
+{
+ return !!(boot_params.hdr.loadflags & KASLR_FLAG);
+}
+
/*
* Do NOT EVER look at the BIOS memory size location.
* It does not work on many machines.
diff --git a/arch/x86/include/asm/sigcontext.h b/arch/x86/include/asm/sigcontext.h
index 9dfce4e0417d..6fe6b182c998 100644
--- a/arch/x86/include/asm/sigcontext.h
+++ b/arch/x86/include/asm/sigcontext.h
@@ -57,9 +57,9 @@ struct sigcontext {
unsigned long ip;
unsigned long flags;
unsigned short cs;
- unsigned short gs;
- unsigned short fs;
- unsigned short __pad0;
+ unsigned short __pad2; /* Was called gs, but was always zero. */
+ unsigned short __pad1; /* Was called fs, but was always zero. */
+ unsigned short ss;
unsigned long err;
unsigned long trapno;
unsigned long oldmask;
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index 7a958164088c..89db46752a8f 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,9 +13,7 @@
X86_EFLAGS_CF | X86_EFLAGS_RF)
void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
- unsigned long *pax);
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask);
diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h
index 8d3120f4e270..ba665ebd17bb 100644
--- a/arch/x86/include/asm/smap.h
+++ b/arch/x86/include/asm/smap.h
@@ -27,23 +27,11 @@
#ifdef CONFIG_X86_SMAP
-#define ASM_CLAC \
- 661: ASM_NOP3 ; \
- .pushsection .altinstr_replacement, "ax" ; \
- 662: __ASM_CLAC ; \
- .popsection ; \
- .pushsection .altinstructions, "a" ; \
- altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
- .popsection
-
-#define ASM_STAC \
- 661: ASM_NOP3 ; \
- .pushsection .altinstr_replacement, "ax" ; \
- 662: __ASM_STAC ; \
- .popsection ; \
- .pushsection .altinstructions, "a" ; \
- altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ; \
- .popsection
+#define ASM_CLAC \
+ ALTERNATIVE "", __stringify(__ASM_CLAC), X86_FEATURE_SMAP
+
+#define ASM_STAC \
+ ALTERNATIVE "", __stringify(__ASM_STAC), X86_FEATURE_SMAP
#else /* CONFIG_X86_SMAP */
@@ -61,20 +49,20 @@
static __always_inline void clac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
+ alternative("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP);
}
static __always_inline void stac(void)
{
/* Note: a barrier is implicit in alternative() */
- alternative(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP);
+ alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP);
}
/* These macros can be used in asm() statements */
#define ASM_CLAC \
- ALTERNATIVE(ASM_NOP3, __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP)
#define ASM_STAC \
- ALTERNATIVE(ASM_NOP3, __stringify(__ASM_STAC), X86_FEATURE_SMAP)
+ ALTERNATIVE("", __stringify(__ASM_STAC), X86_FEATURE_SMAP)
#else /* CONFIG_X86_SMAP */
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 8cd1cc3bc835..81d02fc7dafa 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -154,6 +154,7 @@ void cpu_die_common(unsigned int cpu);
void native_smp_prepare_boot_cpu(void);
void native_smp_prepare_cpus(unsigned int max_cpus);
void native_smp_cpus_done(unsigned int max_cpus);
+void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
int native_cpu_disable(void);
void native_cpu_die(unsigned int cpu);
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 6a4b00fafb00..aeb4666e0c0a 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -4,6 +4,8 @@
#ifdef __KERNEL__
+#include <asm/nops.h>
+
static inline void native_clts(void)
{
asm volatile("clts");
@@ -199,6 +201,28 @@ static inline void clflushopt(volatile void *__p)
"+m" (*(volatile char __force *)__p));
}
+static inline void clwb(volatile void *__p)
+{
+ volatile struct { char x[64]; } *p = __p;
+
+ asm volatile(ALTERNATIVE_2(
+ ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
+ ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
+ X86_FEATURE_CLFLUSHOPT,
+ ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
+ X86_FEATURE_CLWB)
+ : [p] "+m" (*p)
+ : [pax] "a" (p));
+}
+
+static inline void pcommit_sfence(void)
+{
+ alternative(ASM_NOP7,
+ ".byte 0x66, 0x0f, 0xae, 0xf8\n\t" /* pcommit */
+ "sfence",
+ X86_FEATURE_PCOMMIT);
+}
+
#define nop() asm volatile ("nop")
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 1d4e4f279a32..ea2dbe82cba3 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -13,6 +13,33 @@
#include <asm/types.h>
/*
+ * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we
+ * reserve at the top of the kernel stack. We do it because of a nasty
+ * 32-bit corner case. On x86_32, the hardware stack frame is
+ * variable-length. Except for vm86 mode, struct pt_regs assumes a
+ * maximum-length frame. If we enter from CPL 0, the top 8 bytes of
+ * pt_regs don't actually exist. Ordinarily this doesn't matter, but it
+ * does in at least one case:
+ *
+ * If we take an NMI early enough in SYSENTER, then we can end up with
+ * pt_regs that extends above sp0. On the way out, in the espfix code,
+ * we can read the saved SS value, but that value will be above sp0.
+ * Without this offset, that can result in a page fault. (We are
+ * careful that, in this case, the value we read doesn't matter.)
+ *
+ * In vm86 mode, the hardware frame is much longer still, but we neither
+ * access the extra members from NMI context, nor do we write such a
+ * frame at sp0 at all.
+ *
+ * x86_64 has a fixed-length stack frame.
+ */
+#ifdef CONFIG_X86_32
+# define TOP_OF_KERNEL_STACK_PADDING 8
+#else
+# define TOP_OF_KERNEL_STACK_PADDING 0
+#endif
+
+/*
* low level task data that entry.S needs immediate access to
* - this struct should fit entirely inside of one cache line
* - this struct shares the supervisor stack pages
@@ -145,7 +172,6 @@ struct thread_info {
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
#define STACK_WARN (THREAD_SIZE/8)
-#define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8))
/*
* macros/functions for gaining access to the thread information structure
@@ -158,10 +184,7 @@ DECLARE_PER_CPU(unsigned long, kernel_stack);
static inline struct thread_info *current_thread_info(void)
{
- struct thread_info *ti;
- ti = (void *)(this_cpu_read_stable(kernel_stack) +
- KERNEL_STACK_OFFSET - THREAD_SIZE);
- return ti;
+ return (struct thread_info *)(current_top_of_stack() - THREAD_SIZE);
}
static inline unsigned long current_stack_pointer(void)
@@ -177,16 +200,37 @@ static inline unsigned long current_stack_pointer(void)
#else /* !__ASSEMBLY__ */
-/* how to get the thread information struct from ASM */
+/* Load thread_info address into "reg" */
#define GET_THREAD_INFO(reg) \
_ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \
- _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ;
+ _ASM_SUB $(THREAD_SIZE),reg ;
/*
- * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in
- * a certain register (to be used in assembler memory operands).
+ * ASM operand which evaluates to a 'thread_info' address of
+ * the current task, if it is known that "reg" is exactly "off"
+ * bytes below the top of the stack currently.
+ *
+ * ( The kernel stack's size is known at build time, it is usually
+ * 2 or 4 pages, and the bottom of the kernel stack contains
+ * the thread_info structure. So to access the thread_info very
+ * quickly from assembly code we can calculate down from the
+ * top of the kernel stack to the bottom, using constant,
+ * build-time calculations only. )
+ *
+ * For example, to fetch the current thread_info->flags value into %eax
+ * on x86-64 defconfig kernels, in syscall entry code where RSP is
+ * currently at exactly SIZEOF_PTREGS bytes away from the top of the
+ * stack:
+ *
+ * mov ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS), %eax
+ *
+ * will translate to:
+ *
+ * 8b 84 24 b8 c0 ff ff mov -0x3f48(%rsp), %eax
+ *
+ * which is below the current RSP by almost 16K.
*/
-#define THREAD_INFO(reg, off) KERNEL_STACK_OFFSET+(off)-THREAD_SIZE(reg)
+#define ASM_THREAD_INFO(field, reg, off) ((field)+(off)-THREAD_SIZE)(reg)
#endif
@@ -236,6 +280,16 @@ static inline bool is_ia32_task(void)
#endif
return false;
}
+
+/*
+ * Force syscall return via IRET by making it look as if there was
+ * some work pending. IRET is our most capable (but slowest) syscall
+ * return path, which is able to restore modified SS, CS and certain
+ * EFLAGS values that other (fast) syscall return instructions
+ * are not able to restore properly.
+ */
+#define force_iret() set_thread_flag(TIF_NOTIFY_RESUME)
+
#endif /* !__ASSEMBLY__ */
#ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 12a26b979bf1..f2f9b39b274a 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -231,6 +231,6 @@ __copy_from_user_inatomic_nocache(void *dst, const void __user *src,
}
unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest);
+copy_user_handle_tail(char *to, char *from, unsigned len);
#endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 5fa9770035dc..c9a6d68b8d62 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h
@@ -82,18 +82,15 @@ static inline int xsave_state_booting(struct xsave_struct *fx, u64 mask)
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XSAVES"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XSAVE"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
-
- asm volatile(xstate_fault
- : "0" (0)
- : "memory");
-
return err;
}
@@ -112,18 +109,15 @@ static inline int xrstor_state_booting(struct xsave_struct *fx, u64 mask)
if (boot_cpu_has(X86_FEATURE_XSAVES))
asm volatile("1:"XRSTORS"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
else
asm volatile("1:"XRSTOR"\n\t"
"2:\n\t"
- : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
+ xstate_fault
+ : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
-
- asm volatile(xstate_fault
- : "0" (0)
- : "memory");
-
return err;
}
@@ -149,9 +143,9 @@ static inline int xsave_state(struct xsave_struct *fx, u64 mask)
*/
alternative_input_2(
"1:"XSAVE,
- "1:"XSAVEOPT,
+ XSAVEOPT,
X86_FEATURE_XSAVEOPT,
- "1:"XSAVES,
+ XSAVES,
X86_FEATURE_XSAVES,
[fx] "D" (fx), "a" (lmask), "d" (hmask) :
"memory");
@@ -178,7 +172,7 @@ static inline int xrstor_state(struct xsave_struct *fx, u64 mask)
*/
alternative_input(
"1: " XRSTOR,
- "1: " XRSTORS,
+ XRSTORS,
X86_FEATURE_XSAVES,
"D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h
index 44e6dd7e36a2..ab456dc233b5 100644
--- a/arch/x86/include/uapi/asm/bootparam.h
+++ b/arch/x86/include/uapi/asm/bootparam.h
@@ -7,7 +7,6 @@
#define SETUP_DTB 2
#define SETUP_PCI 3
#define SETUP_EFI 4
-#define SETUP_KASLR 5
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
@@ -16,6 +15,7 @@
/* loadflags */
#define LOADED_HIGH (1<<0)
+#define KASLR_FLAG (1<<1)
#define QUIET_FLAG (1<<5)
#define KEEP_SEGMENTS (1<<6)
#define CAN_USE_HEAP (1<<7)
diff --git a/arch/x86/include/uapi/asm/ptrace-abi.h b/arch/x86/include/uapi/asm/ptrace-abi.h
index 7b0a55a88851..580aee3072e0 100644
--- a/arch/x86/include/uapi/asm/ptrace-abi.h
+++ b/arch/x86/include/uapi/asm/ptrace-abi.h
@@ -25,13 +25,17 @@
#else /* __i386__ */
#if defined(__ASSEMBLY__) || defined(__FRAME_OFFSETS)
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
#define R15 0
#define R14 8
#define R13 16
#define R12 24
#define RBP 32
#define RBX 40
-/* arguments: interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
#define R11 48
#define R10 56
#define R9 64
@@ -41,15 +45,17 @@
#define RDX 96
#define RSI 104
#define RDI 112
-#define ORIG_RAX 120 /* = ERROR */
-/* end of arguments */
-/* cpu exception frame or undefined in case of fast syscall. */
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
+#define ORIG_RAX 120
+/* Return frame for iretq */
#define RIP 128
#define CS 136
#define EFLAGS 144
#define RSP 152
#define SS 160
-#define ARGOFFSET R11
#endif /* __ASSEMBLY__ */
/* top of stack page */
diff --git a/arch/x86/include/uapi/asm/ptrace.h b/arch/x86/include/uapi/asm/ptrace.h
index ac4b9aa4d999..bc16115af39b 100644
--- a/arch/x86/include/uapi/asm/ptrace.h
+++ b/arch/x86/include/uapi/asm/ptrace.h
@@ -41,13 +41,17 @@ struct pt_regs {
#ifndef __KERNEL__
struct pt_regs {
+/*
+ * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
+ * unless syscall needs a complete, fully filled "struct pt_regs".
+ */
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
-/* arguments: non interrupts/non tracing syscalls only save up to here*/
+/* These regs are callee-clobbered. Always saved on kernel entry. */
unsigned long r11;
unsigned long r10;
unsigned long r9;
@@ -57,9 +61,12 @@ struct pt_regs {
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
+/*
+ * On syscall entry, this is syscall#. On CPU exception, this is error code.
+ * On hw interrupt, it's IRQ number:
+ */
unsigned long orig_rax;
-/* end of arguments */
-/* cpu exception frame or undefined */
+/* Return frame for iretq */
unsigned long rip;
unsigned long cs;
unsigned long eflags;
diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h
index d8b9f9081e86..16dc4e8a2cd3 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -177,9 +177,24 @@ struct sigcontext {
__u64 rip;
__u64 eflags; /* RFLAGS */
__u16 cs;
- __u16 gs;
- __u16 fs;
- __u16 __pad0;
+
+ /*
+ * Prior to 2.5.64 ("[PATCH] x86-64 updates for 2.5.64-bk3"),
+ * Linux saved and restored fs and gs in these slots. This
+ * was counterproductive, as fsbase and gsbase were never
+ * saved, so arch_prctl was presumably unreliable.
+ *
+ * If these slots are ever needed for any other purpose, there
+ * is some risk that very old 64-bit binaries could get
+ * confused. I doubt that many such binaries still work,
+ * though, since the same patch in 2.5.64 also removed the
+ * 64-bit set_thread_area syscall, so it appears that there is
+ * no TLS API that works in both pre- and post-2.5.64 kernels.
+ */
+ __u16 __pad2; /* Was gs. */
+ __u16 __pad1; /* Was fs. */
+
+ __u16 ss;
__u64 err;
__u64 trapno;
__u64 oldmask;
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index c5f1a1deb91a..1fe92181ee9e 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -67,6 +67,7 @@
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
+#define EXIT_REASON_RDTSCP 51
#define EXIT_REASON_PREEMPTION_TIMER 52
#define EXIT_REASON_INVVPID 53
#define EXIT_REASON_WBINVD 54
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index cdb1b70ddad0..c887cd944f0c 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_X86_32) += i386_ksyms_32.o
obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
obj-$(CONFIG_X86_64) += mcount_64.o
obj-y += syscall_$(BITS).o vsyscall_gtod.o
+obj-$(CONFIG_IA32_EMULATION) += syscall_32.o
obj-$(CONFIG_X86_VSYSCALL_EMULATION) += vsyscall_64.o vsyscall_emu_64.o
obj-$(CONFIG_X86_ESPFIX64) += espfix_64.o
obj-$(CONFIG_SYSFS) += ksysfs.o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 3d525c6124f6..803b684676ff 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1338,6 +1338,26 @@ static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
}
/*
+ * ACPI offers an alternative platform interface model that removes
+ * ACPI hardware requirements for platforms that do not implement
+ * the PC Architecture.
+ *
+ * We initialize the Hardware-reduced ACPI model here:
+ */
+static void __init acpi_reduced_hw_init(void)
+{
+ if (acpi_gbl_reduced_hardware) {
+ /*
+ * Override x86_init functions and bypass legacy pic
+ * in Hardware-reduced ACPI mode
+ */
+ x86_init.timers.timer_init = x86_init_noop;
+ x86_init.irqs.pre_vector_init = x86_init_noop;
+ legacy_pic = &null_legacy_pic;
+ }
+}
+
+/*
* If your system is blacklisted here, but you find that acpi=force
* works for you, please contact linux-acpi@vger.kernel.org
*/
@@ -1536,6 +1556,11 @@ int __init early_acpi_boot_init(void)
*/
early_acpi_process_madt();
+ /*
+ * Hardware-reduced ACPI mode initialization:
+ */
+ acpi_reduced_hw_init();
+
return 0;
}
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 703130f469ec..aef653193160 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -52,10 +52,25 @@ static int __init setup_noreplace_paravirt(char *str)
__setup("noreplace-paravirt", setup_noreplace_paravirt);
#endif
-#define DPRINTK(fmt, ...) \
-do { \
- if (debug_alternative) \
- printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+#define DPRINTK(fmt, args...) \
+do { \
+ if (debug_alternative) \
+ printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args); \
+} while (0)
+
+#define DUMP_BYTES(buf, len, fmt, args...) \
+do { \
+ if (unlikely(debug_alternative)) { \
+ int j; \
+ \
+ if (!(len)) \
+ break; \
+ \
+ printk(KERN_DEBUG fmt, ##args); \
+ for (j = 0; j < (len) - 1; j++) \
+ printk(KERN_CONT "%02hhx ", buf[j]); \
+ printk(KERN_CONT "%02hhx\n", buf[j]); \
+ } \
} while (0)
/*
@@ -243,12 +258,89 @@ extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
void *text_poke_early(void *addr, const void *opcode, size_t len);
-/* Replace instructions with better alternatives for this CPU type.
- This runs before SMP is initialized to avoid SMP problems with
- self modifying code. This implies that asymmetric systems where
- APs have less capabilities than the boot processor are not handled.
- Tough. Make sure you disable such features by hand. */
+/*
+ * Are we looking at a near JMP with a 1 or 4-byte displacement.
+ */
+static inline bool is_jmp(const u8 opcode)
+{
+ return opcode == 0xeb || opcode == 0xe9;
+}
+
+static void __init_or_module
+recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
+{
+ u8 *next_rip, *tgt_rip;
+ s32 n_dspl, o_dspl;
+ int repl_len;
+
+ if (a->replacementlen != 5)
+ return;
+
+ o_dspl = *(s32 *)(insnbuf + 1);
+
+ /* next_rip of the replacement JMP */
+ next_rip = repl_insn + a->replacementlen;
+ /* target rip of the replacement JMP */
+ tgt_rip = next_rip + o_dspl;
+ n_dspl = tgt_rip - orig_insn;
+
+ DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
+
+ if (tgt_rip - orig_insn >= 0) {
+ if (n_dspl - 2 <= 127)
+ goto two_byte_jmp;
+ else
+ goto five_byte_jmp;
+ /* negative offset */
+ } else {
+ if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
+ goto two_byte_jmp;
+ else
+ goto five_byte_jmp;
+ }
+
+two_byte_jmp:
+ n_dspl -= 2;
+
+ insnbuf[0] = 0xeb;
+ insnbuf[1] = (s8)n_dspl;
+ add_nops(insnbuf + 2, 3);
+
+ repl_len = 2;
+ goto done;
+
+five_byte_jmp:
+ n_dspl -= 5;
+
+ insnbuf[0] = 0xe9;
+ *(s32 *)&insnbuf[1] = n_dspl;
+ repl_len = 5;
+
+done:
+
+ DPRINTK("final displ: 0x%08x, JMP 0x%lx",
+ n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+}
+
+static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
+{
+ if (instr[0] != 0x90)
+ return;
+
+ add_nops(instr + (a->instrlen - a->padlen), a->padlen);
+
+ DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
+ instr, a->instrlen - a->padlen, a->padlen);
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
void __init_or_module apply_alternatives(struct alt_instr *start,
struct alt_instr *end)
{
@@ -256,10 +348,10 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
u8 *instr, *replacement;
u8 insnbuf[MAX_PATCH_LEN];
- DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
+ DPRINTK("alt table %p -> %p", start, end);
/*
* The scan order should be from start to end. A later scanned
- * alternative code can overwrite a previous scanned alternative code.
+ * alternative code can overwrite previously scanned alternative code.
* Some kernel functions (e.g. memcpy, memset, etc) use this order to
* patch code.
*
@@ -267,29 +359,54 @@ void __init_or_module apply_alternatives(struct alt_instr *start,
* order.
*/
for (a = start; a < end; a++) {
+ int insnbuf_sz = 0;
+
instr = (u8 *)&a->instr_offset + a->instr_offset;
replacement = (u8 *)&a->repl_offset + a->repl_offset;
- BUG_ON(a->replacementlen > a->instrlen);
BUG_ON(a->instrlen > sizeof(insnbuf));
BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
- if (!boot_cpu_has(a->cpuid))
+ if (!boot_cpu_has(a->cpuid)) {
+ if (a->padlen > 1)
+ optimize_nops(a, instr);
+
continue;
+ }
+
+ DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d), pad: %d",
+ a->cpuid >> 5,
+ a->cpuid & 0x1f,
+ instr, a->instrlen,
+ replacement, a->replacementlen, a->padlen);
+
+ DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
+ DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
memcpy(insnbuf, replacement, a->replacementlen);
+ insnbuf_sz = a->replacementlen;
/* 0xe8 is a relative jump; fix the offset. */
- if (*insnbuf == 0xe8 && a->replacementlen == 5)
- *(s32 *)(insnbuf + 1) += replacement - instr;
+ if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+ *(s32 *)(insnbuf + 1) += replacement - instr;
+ DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
+ *(s32 *)(insnbuf + 1),
+ (unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+ }
+
+ if (a->replacementlen && is_jmp(replacement[0]))
+ recompute_jump(a, instr, replacement, insnbuf);
- add_nops(insnbuf + a->replacementlen,
- a->instrlen - a->replacementlen);
+ if (a->instrlen > a->replacementlen) {
+ add_nops(insnbuf + a->replacementlen,
+ a->instrlen - a->replacementlen);
+ insnbuf_sz += a->instrlen - a->replacementlen;
+ }
+ DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
- text_poke_early(instr, insnbuf, a->instrlen);
+ text_poke_early(instr, insnbuf, insnbuf_sz);
}
}
#ifdef CONFIG_SMP
-
static void alternatives_smp_lock(const s32 *start, const s32 *end,
u8 *text, u8 *text_end)
{
@@ -371,8 +488,8 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
smp->locks_end = locks_end;
smp->text = text;
smp->text_end = text_end;
- DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
- __func__, smp->locks, smp->locks_end,
+ DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
+ smp->locks, smp->locks_end,
smp->text, smp->text_end, smp->name);
list_add_tail(&smp->next, &smp_alt_modules);
@@ -440,7 +557,7 @@ int alternatives_text_reserved(void *start, void *end)
return 0;
}
-#endif
+#endif /* CONFIG_SMP */
#ifdef CONFIG_PARAVIRT
void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
@@ -601,7 +718,7 @@ int poke_int3_handler(struct pt_regs *regs)
if (likely(!bp_patching_in_progress))
return 0;
- if (user_mode_vm(regs) || regs->ip != (unsigned long)bp_int3_addr)
+ if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr)
return 0;
/* set up the specified breakpoint handler */
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ad3639ae1b9b..dcb52850a28f 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1084,67 +1084,6 @@ void lapic_shutdown(void)
local_irq_restore(flags);
}
-/*
- * This is to verify that we're looking at a real local APIC.
- * Check these against your board if the CPUs aren't getting
- * started for no apparent reason.
- */
-int __init verify_local_APIC(void)
-{
- unsigned int reg0, reg1;
-
- /*
- * The version register is read-only in a real APIC.
- */
- reg0 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg0);
- apic_write(APIC_LVR, reg0 ^ APIC_LVR_MASK);
- reg1 = apic_read(APIC_LVR);
- apic_printk(APIC_DEBUG, "Getting VERSION: %x\n", reg1);
-
- /*
- * The two version reads above should print the same
- * numbers. If the second one is different, then we
- * poke at a non-APIC.
- */
- if (reg1 != reg0)
- return 0;
-
- /*
- * Check if the version looks reasonably.
- */
- reg1 = GET_APIC_VERSION(reg0);
- if (reg1 == 0x00 || reg1 == 0xff)
- return 0;
- reg1 = lapic_get_maxlvt();
- if (reg1 < 0x02 || reg1 == 0xff)
- return 0;
-
- /*
- * The ID register is read/write in a real APIC.
- */
- reg0 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
- apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
- reg1 = apic_read(APIC_ID);
- apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
- apic_write(APIC_ID, reg0);
- if (reg1 != (reg0 ^ apic->apic_id_mask))
- return 0;
-
- /*
- * The next two are just to see if we have sane values.
- * They're only really relevant if we're in Virtual Wire
- * compatibility mode, but most boxes are anymore.
- */
- reg0 = apic_read(APIC_LVT0);
- apic_printk(APIC_DEBUG, "Getting LVT0: %x\n", reg0);
- reg1 = apic_read(APIC_LVT1);
- apic_printk(APIC_DEBUG, "Getting LVT1: %x\n", reg1);
-
- return 1;
-}
-
/**
* sync_Arb_IDs - synchronize APIC bus arbitration IDs
*/
@@ -2283,7 +2222,6 @@ int __init APIC_init_uniprocessor(void)
disable_ioapic_support();
default_setup_apic_routing();
- verify_local_APIC();
apic_bsp_setup(true);
return 0;
}
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index c2fd21fed002..017149cded07 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -37,10 +37,12 @@ static const struct apic apic_numachip;
static unsigned int get_apic_id(unsigned long x)
{
unsigned long value;
- unsigned int id;
+ unsigned int id = (x >> 24) & 0xff;
- rdmsrl(MSR_FAM10H_NODE_ID, value);
- id = ((x >> 24) & 0xffU) | ((value << 2) & 0xff00U);
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, value);
+ id |= (value << 2) & 0xff00;
+ }
return id;
}
@@ -155,10 +157,18 @@ static int __init numachip_probe(void)
static void fixup_cpu_id(struct cpuinfo_x86 *c, int node)
{
- if (c->phys_proc_id != node) {
- c->phys_proc_id = node;
- per_cpu(cpu_llc_id, smp_processor_id()) = node;
+ u64 val;
+ u32 nodes = 1;
+
+ this_cpu_write(cpu_llc_id, node);
+
+ /* Account for nodes per socket in multi-core-module processors */
+ if (static_cpu_has_safe(X86_FEATURE_NODEID_MSR)) {
+ rdmsrl(MSR_FAM10H_NODE_ID, val);
+ nodes = ((val >> 3) & 7) + 1;
}
+
+ c->phys_proc_id = node / nodes;
}
static int __init numachip_system_init(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 8e9dcfd630e4..c8d92950bc04 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -144,33 +144,60 @@ static void __init uv_set_apicid_hibit(void)
static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
{
- int pnodeid, is_uv1, is_uv2, is_uv3;
-
- is_uv1 = !strcmp(oem_id, "SGI");
- is_uv2 = !strcmp(oem_id, "SGI2");
- is_uv3 = !strncmp(oem_id, "SGI3", 4); /* there are varieties of UV3 */
- if (is_uv1 || is_uv2 || is_uv3) {
- uv_hub_info->hub_revision =
- (is_uv1 ? UV1_HUB_REVISION_BASE :
- (is_uv2 ? UV2_HUB_REVISION_BASE :
- UV3_HUB_REVISION_BASE));
- pnodeid = early_get_pnodeid();
- early_get_apic_pnode_shift();
- x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
- x86_platform.nmi_init = uv_nmi_init;
- if (!strcmp(oem_table_id, "UVL"))
- uv_system_type = UV_LEGACY_APIC;
- else if (!strcmp(oem_table_id, "UVX"))
- uv_system_type = UV_X2APIC;
- else if (!strcmp(oem_table_id, "UVH")) {
- __this_cpu_write(x2apic_extra_bits,
- pnodeid << uvh_apicid.s.pnode_shift);
- uv_system_type = UV_NON_UNIQUE_APIC;
- uv_set_apicid_hibit();
- return 1;
- }
+ int pnodeid;
+ int uv_apic;
+
+ if (strncmp(oem_id, "SGI", 3) != 0)
+ return 0;
+
+ /*
+ * Determine UV arch type.
+ * SGI: UV100/1000
+ * SGI2: UV2000/3000
+ * SGI3: UV300 (truncated to 4 chars because of different varieties)
+ */
+ uv_hub_info->hub_revision =
+ !strncmp(oem_id, "SGI3", 4) ? UV3_HUB_REVISION_BASE :
+ !strcmp(oem_id, "SGI2") ? UV2_HUB_REVISION_BASE :
+ !strcmp(oem_id, "SGI") ? UV1_HUB_REVISION_BASE : 0;
+
+ if (uv_hub_info->hub_revision == 0)
+ goto badbios;
+
+ pnodeid = early_get_pnodeid();
+ early_get_apic_pnode_shift();
+ x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range;
+ x86_platform.nmi_init = uv_nmi_init;
+
+ if (!strcmp(oem_table_id, "UVX")) { /* most common */
+ uv_system_type = UV_X2APIC;
+ uv_apic = 0;
+
+ } else if (!strcmp(oem_table_id, "UVH")) { /* only UV1 systems */
+ uv_system_type = UV_NON_UNIQUE_APIC;
+ __this_cpu_write(x2apic_extra_bits,
+ pnodeid << uvh_apicid.s.pnode_shift);
+ uv_set_apicid_hibit();
+ uv_apic = 1;
+
+ } else if (!strcmp(oem_table_id, "UVL")) { /* only used for */
+ uv_system_type = UV_LEGACY_APIC; /* very small systems */
+ uv_apic = 0;
+
+ } else {
+ goto badbios;
}
- return 0;
+
+ pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n",
+ oem_id, oem_table_id, uv_system_type,
+ uv_min_hub_revision_id, uv_apic);
+
+ return uv_apic;
+
+badbios:
+ pr_err("UV: OEM_ID:%s OEM_TABLE_ID:%s\n", oem_id, oem_table_id);
+ pr_err("Current BIOS not supported, update kernel and/or BIOS\n");
+ BUG();
}
enum uv_system_type get_uv_system_type(void)
@@ -854,10 +881,14 @@ void __init uv_system_init(void)
unsigned long mmr_base, present, paddr;
unsigned short pnode_mask;
unsigned char n_lshift;
- char *hub = (is_uv1_hub() ? "UV1" :
- (is_uv2_hub() ? "UV2" :
- "UV3"));
+ char *hub = (is_uv1_hub() ? "UV100/1000" :
+ (is_uv2_hub() ? "UV2000/3000" :
+ (is_uv3_hub() ? "UV300" : NULL)));
+ if (!hub) {
+ pr_err("UV: Unknown/unsupported UV hub\n");
+ return;
+ }
pr_info("UV: Found %s hub\n", hub);
map_low_mmrs();
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index 3b3b9d33ac1d..47703aed74cf 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -68,7 +68,7 @@ void foo(void)
/* Offset from the sysenter stack to tss.sp0 */
DEFINE(TSS_sysenter_sp0, offsetof(struct tss_struct, x86_tss.sp0) -
- sizeof(struct tss_struct));
+ offsetofend(struct tss_struct, SYSENTER_stack));
#if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE)
BLANK();
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index fdcbb4d27c9f..5ce6f2da8763 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -81,6 +81,7 @@ int main(void)
#undef ENTRY
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
+ OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
BLANK();
DEFINE(__NR_syscall_max, sizeof(syscalls_64) - 1);
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index a220239cea65..dd9e50500297 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -711,6 +711,11 @@ static void init_amd(struct cpuinfo_x86 *c)
set_cpu_bug(c, X86_BUG_AMD_APIC_C1E);
rdmsr_safe(MSR_AMD64_PATCH_LEVEL, &c->microcode, &dummy);
+
+ /* 3DNow or LM implies PREFETCHW */
+ if (!cpu_has(c, X86_FEATURE_3DNOWPREFETCH))
+ if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
+ set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2346c95c6ab1..3f70538012e2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -959,38 +959,37 @@ static void identify_cpu(struct cpuinfo_x86 *c)
#endif
}
-#ifdef CONFIG_X86_64
-#ifdef CONFIG_IA32_EMULATION
-/* May not be __init: called during resume */
-static void syscall32_cpu_init(void)
-{
- /* Load these always in case some future AMD CPU supports
- SYSENTER from compat mode too. */
- wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
- wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
- wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
-
- wrmsrl(MSR_CSTAR, ia32_cstar_target);
-}
-#endif /* CONFIG_IA32_EMULATION */
-#endif /* CONFIG_X86_64 */
-
+/*
+ * Set up the CPU state needed to execute SYSENTER/SYSEXIT instructions
+ * on 32-bit kernels:
+ */
#ifdef CONFIG_X86_32
void enable_sep_cpu(void)
{
- int cpu = get_cpu();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ struct tss_struct *tss;
+ int cpu;
- if (!boot_cpu_has(X86_FEATURE_SEP)) {
- put_cpu();
- return;
- }
+ cpu = get_cpu();
+ tss = &per_cpu(cpu_tss, cpu);
+
+ if (!boot_cpu_has(X86_FEATURE_SEP))
+ goto out;
+
+ /*
+ * We cache MSR_IA32_SYSENTER_CS's value in the TSS's ss1 field --
+ * see the big comment in struct x86_hw_tss's definition.
+ */
tss->x86_tss.ss1 = __KERNEL_CS;
- tss->x86_tss.sp1 = sizeof(struct tss_struct) + (unsigned long) tss;
- wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0);
- wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.sp1, 0);
- wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) ia32_sysenter_target, 0);
+ wrmsr(MSR_IA32_SYSENTER_CS, tss->x86_tss.ss1, 0);
+
+ wrmsr(MSR_IA32_SYSENTER_ESP,
+ (unsigned long)tss + offsetofend(struct tss_struct, SYSENTER_stack),
+ 0);
+
+ wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long)ia32_sysenter_target, 0);
+
+out:
put_cpu();
}
#endif
@@ -1118,7 +1117,7 @@ static __init int setup_disablecpuid(char *arg)
__setup("clearcpuid=", setup_disablecpuid);
DEFINE_PER_CPU(unsigned long, kernel_stack) =
- (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
+ (unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(kernel_stack);
#ifdef CONFIG_X86_64
@@ -1130,8 +1129,8 @@ DEFINE_PER_CPU_FIRST(union irq_stack_union,
irq_stack_union) __aligned(PAGE_SIZE) __visible;
/*
- * The following four percpu variables are hot. Align current_task to
- * cacheline size such that all four fall in the same cacheline.
+ * The following percpu variables are hot. Align current_task to
+ * cacheline size such that they fall in the same cacheline.
*/
DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
&init_task;
@@ -1171,10 +1170,23 @@ void syscall_init(void)
*/
wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
wrmsrl(MSR_LSTAR, system_call);
- wrmsrl(MSR_CSTAR, ignore_sysret);
#ifdef CONFIG_IA32_EMULATION
- syscall32_cpu_init();
+ wrmsrl(MSR_CSTAR, ia32_cstar_target);
+ /*
+ * This only works on Intel CPUs.
+ * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP.
+ * This does not cause SYSENTER to jump to the wrong location, because
+ * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit).
+ */
+ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)ia32_sysenter_target);
+#else
+ wrmsrl(MSR_CSTAR, ignore_sysret);
+ wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG);
+ wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL);
+ wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL);
#endif
/* Flags to clear on syscall */
@@ -1226,6 +1238,15 @@ DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
EXPORT_PER_CPU_SYMBOL(__preempt_count);
DEFINE_PER_CPU(struct task_struct *, fpu_owner_task);
+/*
+ * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
+ * the top of the kernel stack. Use an extra percpu variable to track the
+ * top of the kernel stack directly.
+ */
+DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
+ (unsigned long)&init_thread_union + THREAD_SIZE;
+EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
+
#ifdef CONFIG_CC_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
@@ -1307,7 +1328,7 @@ void cpu_init(void)
*/
load_ucode_ap();
- t = &per_cpu(init_tss, cpu);
+ t = &per_cpu(cpu_tss, cpu);
oist = &per_cpu(orig_ist, cpu);
#ifdef CONFIG_NUMA
@@ -1391,7 +1412,7 @@ void cpu_init(void)
{
int cpu = smp_processor_id();
struct task_struct *curr = current;
- struct tss_struct *t = &per_cpu(init_tss, cpu);
+ struct tss_struct *t = &per_cpu(cpu_tss, cpu);
struct thread_struct *thread = &curr->thread;
wait_for_master_cpu(cpu);
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
index 36d99a337b49..3f20710a5b23 100644
--- a/arch/x86/kernel/cpu/mkcapflags.sh
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -6,7 +6,7 @@
IN=$1
OUT=$2
-function dump_array()
+dump_array()
{
ARRAY=$1
SIZE=$2
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index b71a7f86d68a..e2888a3ad1e3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2147,24 +2147,24 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
static unsigned long code_segment_base(struct pt_regs *regs)
{
/*
+ * For IA32 we look at the GDT/LDT segment base to convert the
+ * effective IP to a linear address.
+ */
+
+#ifdef CONFIG_X86_32
+ /*
* If we are in VM86 mode, add the segment offset to convert to a
* linear address.
*/
if (regs->flags & X86_VM_MASK)
return 0x10 * regs->cs;
- /*
- * For IA32 we look at the GDT/LDT segment base to convert the
- * effective IP to a linear address.
- */
-#ifdef CONFIG_X86_32
if (user_mode(regs) && regs->cs != __USER_CS)
return get_segment_base(regs->cs);
#else
- if (test_thread_flag(TIF_IA32)) {
- if (user_mode(regs) && regs->cs != __USER32_CS)
- return get_segment_base(regs->cs);
- }
+ if (user_mode(regs) && !user_64bit_mode(regs) &&
+ regs->cs != __USER32_CS)
+ return get_segment_base(regs->cs);
#endif
return 0;
}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 498b6d967138..258990688a5e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -212,11 +212,11 @@ static struct event_constraint intel_hsw_event_constraints[] = {
INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
/* CYCLE_ACTIVITY.CYCLES_L1D_PENDING */
- INTEL_EVENT_CONSTRAINT(0x08a3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x08a3, 0x4),
/* CYCLE_ACTIVITY.STALLS_L1D_PENDING */
- INTEL_EVENT_CONSTRAINT(0x0ca3, 0x4),
+ INTEL_UEVENT_CONSTRAINT(0x0ca3, 0x4),
/* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
- INTEL_EVENT_CONSTRAINT(0x04a3, 0xf),
+ INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf),
EVENT_CONSTRAINT_END
};
@@ -1649,11 +1649,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
if (c)
return c;
- c = intel_pebs_constraints(event);
+ c = intel_shared_regs_constraints(cpuc, event);
if (c)
return c;
- c = intel_shared_regs_constraints(cpuc, event);
+ c = intel_pebs_constraints(event);
if (c)
return c;
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index aceb2f90c716..c76d3e37c6e1 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -105,7 +105,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs)
#ifdef CONFIG_X86_32
struct pt_regs fixed_regs;
- if (!user_mode_vm(regs)) {
+ if (!user_mode(regs)) {
crash_fixup_ss_esp(&fixed_regs, regs);
regs = &fixed_regs;
}
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index cf3df1d8d039..ab3b65639a3e 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -278,7 +278,7 @@ int __die(const char *str, struct pt_regs *regs, long err)
print_modules();
show_regs(regs);
#ifdef CONFIG_X86_32
- if (user_mode_vm(regs)) {
+ if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
} else {
@@ -307,7 +307,7 @@ void die(const char *str, struct pt_regs *regs, long err)
unsigned long flags = oops_begin();
int sig = SIGSEGV;
- if (!user_mode_vm(regs))
+ if (!user_mode(regs))
report_bug(regs->ip, regs);
if (__die(str, regs, err))
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 5abd4cd4230c..39891ff50d03 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -123,13 +123,13 @@ void show_regs(struct pt_regs *regs)
int i;
show_regs_print_info(KERN_EMERG);
- __show_regs(regs, !user_mode_vm(regs));
+ __show_regs(regs, !user_mode(regs));
/*
* When in-kernel, we also print out the stack and code at the
* time of the fault..
*/
- if (!user_mode_vm(regs)) {
+ if (!user_mode(regs)) {
unsigned int code_prologue = code_bytes * 43 / 64;
unsigned int code_len = code_bytes;
unsigned char c;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 31e2d5bf3e38..1c309763e321 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -395,10 +395,13 @@ sysenter_past_esp:
/*CFI_REL_OFFSET cs, 0*/
/*
* Push current_thread_info()->sysenter_return to the stack.
- * A tiny bit of offset fixup is necessary - 4*4 means the 4 words
- * pushed above; +8 corresponds to copy_thread's esp0 setting.
+ * A tiny bit of offset fixup is necessary: TI_sysenter_return
+ * is relative to thread_info, which is at the bottom of the
+ * kernel stack page. 4*4 means the 4 words pushed above;
+ * TOP_OF_KERNEL_STACK_PADDING takes us to the top of the stack;
+ * and THREAD_SIZE takes us to the bottom.
*/
- pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp)
+ pushl_cfi ((TI_sysenter_return) - THREAD_SIZE + TOP_OF_KERNEL_STACK_PADDING + 4*4)(%esp)
CFI_REL_OFFSET eip, 0
pushl_cfi %eax
@@ -432,7 +435,7 @@ sysenter_after_call:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx
- jne sysexit_audit
+ jnz sysexit_audit
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
@@ -460,7 +463,7 @@ sysenter_audit:
sysexit_audit:
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jne syscall_exit_work
+ jnz syscall_exit_work
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_ANY)
movl %eax,%edx /* second arg, syscall return value */
@@ -472,7 +475,7 @@ sysexit_audit:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jne syscall_exit_work
+ jnz syscall_exit_work
movl PT_EAX(%esp),%eax /* reload syscall return value */
jmp sysenter_exit
#endif
@@ -510,7 +513,7 @@ syscall_exit:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx # current->work
- jne syscall_exit_work
+ jnz syscall_exit_work
restore_all:
TRACE_IRQS_IRET
@@ -612,7 +615,7 @@ work_notifysig: # deal with pending signals and
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
movl %esp, %eax
- jne work_notifysig_v86 # returning to kernel-space or
+ jnz work_notifysig_v86 # returning to kernel-space or
# vm86-space
1:
#else
@@ -720,43 +723,22 @@ END(sysenter_badsys)
.endm
/*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
*/
-.section .init.rodata,"a"
-ENTRY(interrupt)
-.section .entry.text, "ax"
- .p2align 5
- .p2align CONFIG_X86_L1_CACHE_SHIFT
+ .align 8
ENTRY(irq_entries_start)
RING0_INT_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
- .balign 32
- .rept 7
- .if vector < FIRST_SYSTEM_VECTOR
- .if vector <> FIRST_EXTERNAL_VECTOR
+ vector=FIRST_EXTERNAL_VECTOR
+ .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_interrupt
CFI_ADJUST_CFA_OFFSET -4
- .endif
-1: pushl_cfi $(~vector+0x80) /* Note: always in signed byte range */
- .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
- jmp 2f
- .endif
- .previous
- .long 1b
- .section .entry.text, "ax"
-vector=vector+1
- .endif
- .endr
-2: jmp common_interrupt
-.endr
+ .align 8
+ .endr
END(irq_entries_start)
-.previous
-END(interrupt)
-.previous
-
/*
* the CPU automatically disables interrupts when executing an IRQ vector,
* so IRQ-flags tracing has to follow that:
@@ -816,15 +798,9 @@ ENTRY(simd_coprocessor_error)
pushl_cfi $0
#ifdef CONFIG_X86_INVD_BUG
/* AMD 486 bug: invd from userspace calls exception 19 instead of #GP */
-661: pushl_cfi $do_general_protection
-662:
-.section .altinstructions,"a"
- altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
-.previous
-.section .altinstr_replacement,"ax"
-663: pushl $do_simd_coprocessor_error
-664:
-.previous
+ ALTERNATIVE "pushl_cfi $do_general_protection", \
+ "pushl $do_simd_coprocessor_error", \
+ X86_FEATURE_XMM
#else
pushl_cfi $do_simd_coprocessor_error
#endif
@@ -1240,20 +1216,13 @@ error_code:
/*CFI_REL_OFFSET es, 0*/
pushl_cfi %ds
/*CFI_REL_OFFSET ds, 0*/
- pushl_cfi %eax
- CFI_REL_OFFSET eax, 0
- pushl_cfi %ebp
- CFI_REL_OFFSET ebp, 0
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %edx
- CFI_REL_OFFSET edx, 0
- pushl_cfi %ecx
- CFI_REL_OFFSET ecx, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg eax
+ pushl_cfi_reg ebp
+ pushl_cfi_reg edi
+ pushl_cfi_reg esi
+ pushl_cfi_reg edx
+ pushl_cfi_reg ecx
+ pushl_cfi_reg ebx
cld
movl $(__KERNEL_PERCPU), %ecx
movl %ecx, %fs
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 10074ad9ebf8..c7b238494b31 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -14,27 +14,14 @@
* NOTE: This code handles signal-recognition, which happens every time
* after an interrupt and after each system call.
*
- * Normal syscalls and interrupts don't save a full stack frame, this is
- * only done for syscall tracing, signals or fork/exec et.al.
- *
* A note on terminology:
- * - top of stack: Architecture defined interrupt frame from SS to RIP
+ * - iret frame: Architecture defined interrupt frame from SS to RIP
* at the top of the kernel process stack.
- * - partial stack frame: partially saved registers up to R11.
- * - full stack frame: Like partial stack frame, but all register saved.
*
* Some macro usage:
* - CFI macros are used to generate dwarf2 unwind information for better
* backtraces. They don't change any code.
- * - SAVE_ALL/RESTORE_ALL - Save/restore all registers
- * - SAVE_ARGS/RESTORE_ARGS - Save/restore registers that C functions modify.
- * There are unfortunately lots of special cases where some registers
- * not touched. The macro is a big mess that should be cleaned up.
- * - SAVE_REST/RESTORE_REST - Handle the registers not saved by SAVE_ARGS.
- * Gives a full stack frame.
* - ENTRY/END Define functions in the symbol table.
- * - FIXUP_TOP_OF_STACK/RESTORE_TOP_OF_STACK - Fix up the hardware stack
- * frame that is otherwise undefined after a SYSCALL
* - TRACE_IRQ_* - Trace hard interrupt state for lock debugging.
* - idtentry - Define exception entry points.
*/
@@ -70,10 +57,6 @@
.section .entry.text, "ax"
-#ifndef CONFIG_PREEMPT
-#define retint_kernel retint_restore_args
-#endif
-
#ifdef CONFIG_PARAVIRT
ENTRY(native_usergs_sysret64)
swapgs
@@ -82,9 +65,9 @@ ENDPROC(native_usergs_sysret64)
#endif /* CONFIG_PARAVIRT */
-.macro TRACE_IRQS_IRETQ offset=ARGOFFSET
+.macro TRACE_IRQS_IRETQ
#ifdef CONFIG_TRACE_IRQFLAGS
- bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+ bt $9,EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON
1:
@@ -116,8 +99,8 @@ ENDPROC(native_usergs_sysret64)
call debug_stack_reset
.endm
-.macro TRACE_IRQS_IRETQ_DEBUG offset=ARGOFFSET
- bt $9,EFLAGS-\offset(%rsp) /* interrupts off? */
+.macro TRACE_IRQS_IRETQ_DEBUG
+ bt $9,EFLAGS(%rsp) /* interrupts off? */
jnc 1f
TRACE_IRQS_ON_DEBUG
1:
@@ -130,34 +113,7 @@ ENDPROC(native_usergs_sysret64)
#endif
/*
- * C code is not supposed to know about undefined top of stack. Every time
- * a C function with an pt_regs argument is called from the SYSCALL based
- * fast path FIXUP_TOP_OF_STACK is needed.
- * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs
- * manipulation.
- */
-
- /* %rsp:at FRAMEEND */
- .macro FIXUP_TOP_OF_STACK tmp offset=0
- movq PER_CPU_VAR(old_rsp),\tmp
- movq \tmp,RSP+\offset(%rsp)
- movq $__USER_DS,SS+\offset(%rsp)
- movq $__USER_CS,CS+\offset(%rsp)
- movq RIP+\offset(%rsp),\tmp /* get rip */
- movq \tmp,RCX+\offset(%rsp) /* copy it to rcx as sysret would do */
- movq R11+\offset(%rsp),\tmp /* get eflags */
- movq \tmp,EFLAGS+\offset(%rsp)
- .endm
-
- .macro RESTORE_TOP_OF_STACK tmp offset=0
- movq RSP+\offset(%rsp),\tmp
- movq \tmp,PER_CPU_VAR(old_rsp)
- movq EFLAGS+\offset(%rsp),\tmp
- movq \tmp,R11+\offset(%rsp)
- .endm
-
-/*
- * initial frame state for interrupts (and exceptions without error code)
+ * empty frame
*/
.macro EMPTY_FRAME start=1 offset=0
.if \start
@@ -173,12 +129,12 @@ ENDPROC(native_usergs_sysret64)
* initial frame state for interrupts (and exceptions without error code)
*/
.macro INTR_FRAME start=1 offset=0
- EMPTY_FRAME \start, SS+8+\offset-RIP
- /*CFI_REL_OFFSET ss, SS+\offset-RIP*/
- CFI_REL_OFFSET rsp, RSP+\offset-RIP
- /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/
- /*CFI_REL_OFFSET cs, CS+\offset-RIP*/
- CFI_REL_OFFSET rip, RIP+\offset-RIP
+ EMPTY_FRAME \start, 5*8+\offset
+ /*CFI_REL_OFFSET ss, 4*8+\offset*/
+ CFI_REL_OFFSET rsp, 3*8+\offset
+ /*CFI_REL_OFFSET rflags, 2*8+\offset*/
+ /*CFI_REL_OFFSET cs, 1*8+\offset*/
+ CFI_REL_OFFSET rip, 0*8+\offset
.endm
/*
@@ -186,30 +142,23 @@ ENDPROC(native_usergs_sysret64)
* with vector already pushed)
*/
.macro XCPT_FRAME start=1 offset=0
- INTR_FRAME \start, RIP+\offset-ORIG_RAX
- .endm
-
-/*
- * frame that enables calling into C.
- */
- .macro PARTIAL_FRAME start=1 offset=0
- XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET
- CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET
- CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET
- CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET
- CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET
- CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET
- CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET
- CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET
- CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET
- CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET
+ INTR_FRAME \start, 1*8+\offset
.endm
/*
* frame that enables passing a complete pt_regs to a C function.
*/
.macro DEFAULT_FRAME start=1 offset=0
- PARTIAL_FRAME \start, R11+\offset-R15
+ XCPT_FRAME \start, ORIG_RAX+\offset
+ CFI_REL_OFFSET rdi, RDI+\offset
+ CFI_REL_OFFSET rsi, RSI+\offset
+ CFI_REL_OFFSET rdx, RDX+\offset
+ CFI_REL_OFFSET rcx, RCX+\offset
+ CFI_REL_OFFSET rax, RAX+\offset
+ CFI_REL_OFFSET r8, R8+\offset
+ CFI_REL_OFFSET r9, R9+\offset
+ CFI_REL_OFFSET r10, R10+\offset
+ CFI_REL_OFFSET r11, R11+\offset
CFI_REL_OFFSET rbx, RBX+\offset
CFI_REL_OFFSET rbp, RBP+\offset
CFI_REL_OFFSET r12, R12+\offset
@@ -218,102 +167,30 @@ ENDPROC(native_usergs_sysret64)
CFI_REL_OFFSET r15, R15+\offset
.endm
-ENTRY(save_paranoid)
- XCPT_FRAME 1 RDI+8
- cld
- movq %rdi, RDI+8(%rsp)
- movq %rsi, RSI+8(%rsp)
- movq_cfi rdx, RDX+8
- movq_cfi rcx, RCX+8
- movq_cfi rax, RAX+8
- movq %r8, R8+8(%rsp)
- movq %r9, R9+8(%rsp)
- movq %r10, R10+8(%rsp)
- movq %r11, R11+8(%rsp)
- movq_cfi rbx, RBX+8
- movq %rbp, RBP+8(%rsp)
- movq %r12, R12+8(%rsp)
- movq %r13, R13+8(%rsp)
- movq %r14, R14+8(%rsp)
- movq %r15, R15+8(%rsp)
- movl $1,%ebx
- movl $MSR_GS_BASE,%ecx
- rdmsr
- testl %edx,%edx
- js 1f /* negative -> in kernel */
- SWAPGS
- xorl %ebx,%ebx
-1: ret
- CFI_ENDPROC
-END(save_paranoid)
-
/*
- * A newly forked process directly context switches into this address.
+ * 64bit SYSCALL instruction entry. Up to 6 arguments in registers.
*
- * rdi: prev task we switched from
- */
-ENTRY(ret_from_fork)
- DEFAULT_FRAME
-
- LOCK ; btr $TIF_FORK,TI_flags(%r8)
-
- pushq_cfi $0x0002
- popfq_cfi # reset kernel eflags
-
- call schedule_tail # rdi: 'prev' task parameter
-
- GET_THREAD_INFO(%rcx)
-
- RESTORE_REST
-
- testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread?
- jz 1f
-
- testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET
- jnz int_ret_from_sys_call
-
- RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
- jmp ret_from_sys_call # go to the SYSRET fastpath
-
-1:
- subq $REST_SKIP, %rsp # leave space for volatiles
- CFI_ADJUST_CFA_OFFSET REST_SKIP
- movq %rbp, %rdi
- call *%rbx
- movl $0, RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(ret_from_fork)
-
-/*
- * System call entry. Up to 6 arguments in registers are supported.
+ * 64bit SYSCALL saves rip to rcx, clears rflags.RF, then saves rflags to r11,
+ * then loads new ss, cs, and rip from previously programmed MSRs.
+ * rflags gets masked by a value from another MSR (so CLD and CLAC
+ * are not needed). SYSCALL does not save anything on the stack
+ * and does not change rsp.
*
- * SYSCALL does not save anything on the stack and does not change the
- * stack pointer. However, it does mask the flags register for us, so
- * CLD and CLAC are not needed.
- */
-
-/*
- * Register setup:
+ * Registers on entry:
* rax system call number
+ * rcx return address
+ * r11 saved rflags (note: r11 is callee-clobbered register in C ABI)
* rdi arg0
- * rcx return address for syscall/sysret, C arg3
* rsi arg1
* rdx arg2
- * r10 arg3 (--> moved to rcx for C)
+ * r10 arg3 (needs to be moved to rcx to conform to C ABI)
* r8 arg4
* r9 arg5
- * r11 eflags for syscall/sysret, temporary for C
- * r12-r15,rbp,rbx saved by C code, not touched.
+ * (note: r12-r15,rbp,rbx are callee-preserved in C ABI)
*
- * Interrupts are off on entry.
* Only called from user space.
*
- * XXX if we had a free scratch register we could save the RSP into the stack frame
- * and report it properly in ps. Unfortunately we haven't.
- *
- * When user can change the frames always force IRET. That is because
+ * When user can change pt_regs->foo always force IRET. That is because
* it deals with uncanonical addresses better. SYSRET has trouble
* with them due to bugs in both AMD and Intel CPUs.
*/
@@ -321,9 +198,15 @@ END(ret_from_fork)
ENTRY(system_call)
CFI_STARTPROC simple
CFI_SIGNAL_FRAME
- CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
+ CFI_DEF_CFA rsp,0
CFI_REGISTER rip,rcx
/*CFI_REGISTER rflags,r11*/
+
+ /*
+ * Interrupts are off on entry.
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
SWAPGS_UNSAFE_STACK
/*
* A hypervisor implementation might want to use a label
@@ -332,18 +215,38 @@ ENTRY(system_call)
*/
GLOBAL(system_call_after_swapgs)
- movq %rsp,PER_CPU_VAR(old_rsp)
+ movq %rsp,PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack),%rsp
+
+ /* Construct struct pt_regs on stack */
+ pushq_cfi $__USER_DS /* pt_regs->ss */
+ pushq_cfi PER_CPU_VAR(rsp_scratch) /* pt_regs->sp */
/*
- * No need to follow this irqs off/on section - it's straight
- * and short:
+ * Re-enable interrupts.
+ * We use 'rsp_scratch' as a scratch space, hence irq-off block above
+ * must execute atomically in the face of possible interrupt-driven
+ * task preemption. We must enable interrupts only after we're done
+ * with using rsp_scratch:
*/
ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_ARGS 8, 0, rax_enosys=1
- movq_cfi rax,(ORIG_RAX-ARGOFFSET)
- movq %rcx,RIP-ARGOFFSET(%rsp)
- CFI_REL_OFFSET rip,RIP-ARGOFFSET
- testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
+ pushq_cfi %r11 /* pt_regs->flags */
+ pushq_cfi $__USER_CS /* pt_regs->cs */
+ pushq_cfi %rcx /* pt_regs->ip */
+ CFI_REL_OFFSET rip,0
+ pushq_cfi_reg rax /* pt_regs->orig_ax */
+ pushq_cfi_reg rdi /* pt_regs->di */
+ pushq_cfi_reg rsi /* pt_regs->si */
+ pushq_cfi_reg rdx /* pt_regs->dx */
+ pushq_cfi_reg rcx /* pt_regs->cx */
+ pushq_cfi $-ENOSYS /* pt_regs->ax */
+ pushq_cfi_reg r8 /* pt_regs->r8 */
+ pushq_cfi_reg r9 /* pt_regs->r9 */
+ pushq_cfi_reg r10 /* pt_regs->r10 */
+ pushq_cfi_reg r11 /* pt_regs->r11 */
+ sub $(6*8),%rsp /* pt_regs->bp,bx,r12-15 not saved */
+ CFI_ADJUST_CFA_OFFSET 6*8
+
+ testl $_TIF_WORK_SYSCALL_ENTRY, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
jnz tracesys
system_call_fastpath:
#if __SYSCALL_MASK == ~0
@@ -352,82 +255,96 @@ system_call_fastpath:
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
- ja ret_from_sys_call /* and return regs->ax */
+ ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10,%rcx
- call *sys_call_table(,%rax,8) # XXX: rip relative
- movq %rax,RAX-ARGOFFSET(%rsp)
+ call *sys_call_table(,%rax,8)
+ movq %rax,RAX(%rsp)
+1:
/*
- * Syscall return path ending with SYSRET (fast path)
- * Has incomplete stack frame and undefined top of stack.
+ * Syscall return path ending with SYSRET (fast path).
+ * Has incompletely filled pt_regs.
*/
-ret_from_sys_call:
- testl $_TIF_ALLWORK_MASK,TI_flags+THREAD_INFO(%rsp,RIP-ARGOFFSET)
- jnz int_ret_from_sys_call_fixup /* Go the the slow path */
-
LOCKDEP_SYS_EXIT
+ /*
+ * We do not frame this tiny irq-off block with TRACE_IRQS_OFF/ON,
+ * it is too small to ever cause noticeable irq latency.
+ */
DISABLE_INTERRUPTS(CLBR_NONE)
- TRACE_IRQS_OFF
- CFI_REMEMBER_STATE
+
/*
- * sysretq will re-enable interrupts:
+ * We must check ti flags with interrupts (or at least preemption)
+ * off because we must *never* return to userspace without
+ * processing exit work that is enqueued if we're preempted here.
+ * In particular, returning to userspace with any of the one-shot
+ * flags (TIF_NOTIFY_RESUME, TIF_USER_RETURN_NOTIFY, etc) set is
+ * very bad.
*/
- TRACE_IRQS_ON
- movq RIP-ARGOFFSET(%rsp),%rcx
+ testl $_TIF_ALLWORK_MASK, ASM_THREAD_INFO(TI_flags, %rsp, SIZEOF_PTREGS)
+ jnz int_ret_from_sys_call_irqs_off /* Go to the slow path */
+
+ CFI_REMEMBER_STATE
+
+ RESTORE_C_REGS_EXCEPT_RCX_R11
+ movq RIP(%rsp),%rcx
CFI_REGISTER rip,rcx
- RESTORE_ARGS 1,-ARG_SKIP,0
+ movq EFLAGS(%rsp),%r11
/*CFI_REGISTER rflags,r11*/
- movq PER_CPU_VAR(old_rsp), %rsp
+ movq RSP(%rsp),%rsp
+ /*
+ * 64bit SYSRET restores rip from rcx,
+ * rflags from r11 (but RF and VM bits are forced to 0),
+ * cs and ss are loaded from MSRs.
+ * Restoration of rflags re-enables interrupts.
+ */
USERGS_SYSRET64
CFI_RESTORE_STATE
-int_ret_from_sys_call_fixup:
- FIXUP_TOP_OF_STACK %r11, -ARGOFFSET
- jmp int_ret_from_sys_call
-
- /* Do syscall tracing */
+ /* Do syscall entry tracing */
tracesys:
- leaq -REST_SKIP(%rsp), %rdi
- movq $AUDIT_ARCH_X86_64, %rsi
+ movq %rsp, %rdi
+ movl $AUDIT_ARCH_X86_64, %esi
call syscall_trace_enter_phase1
test %rax, %rax
jnz tracesys_phase2 /* if needed, run the slow path */
- LOAD_ARGS 0 /* else restore clobbered regs */
+ RESTORE_C_REGS_EXCEPT_RAX /* else restore clobbered regs */
+ movq ORIG_RAX(%rsp), %rax
jmp system_call_fastpath /* and return to the fast path */
tracesys_phase2:
- SAVE_REST
- FIXUP_TOP_OF_STACK %rdi
+ SAVE_EXTRA_REGS
movq %rsp, %rdi
- movq $AUDIT_ARCH_X86_64, %rsi
+ movl $AUDIT_ARCH_X86_64, %esi
movq %rax,%rdx
call syscall_trace_enter_phase2
/*
- * Reload arg registers from stack in case ptrace changed them.
+ * Reload registers from stack in case ptrace changed them.
* We don't reload %rax because syscall_trace_entry_phase2() returned
* the value it wants us to use in the table lookup.
*/
- LOAD_ARGS ARGOFFSET, 1
- RESTORE_REST
+ RESTORE_C_REGS_EXCEPT_RAX
+ RESTORE_EXTRA_REGS
#if __SYSCALL_MASK == ~0
cmpq $__NR_syscall_max,%rax
#else
andl $__SYSCALL_MASK,%eax
cmpl $__NR_syscall_max,%eax
#endif
- ja int_ret_from_sys_call /* RAX(%rsp) is already set */
+ ja 1f /* return -ENOSYS (already in pt_regs->ax) */
movq %r10,%rcx /* fixup for C */
call *sys_call_table(,%rax,8)
- movq %rax,RAX-ARGOFFSET(%rsp)
- /* Use IRET because user could have changed frame */
+ movq %rax,RAX(%rsp)
+1:
+ /* Use IRET because user could have changed pt_regs->foo */
/*
* Syscall return path ending with IRET.
- * Has correct top of stack, but partial stack frame.
+ * Has correct iret frame.
*/
GLOBAL(int_ret_from_sys_call)
DISABLE_INTERRUPTS(CLBR_NONE)
+int_ret_from_sys_call_irqs_off: /* jumps come here from the irqs-off SYSRET path */
TRACE_IRQS_OFF
movl $_TIF_ALLWORK_MASK,%edi
/* edi: mask to check */
@@ -437,8 +354,8 @@ GLOBAL(int_with_check)
movl TI_flags(%rcx),%edx
andl %edi,%edx
jnz int_careful
- andl $~TS_COMPAT,TI_status(%rcx)
- jmp retint_swapgs
+ andl $~TS_COMPAT,TI_status(%rcx)
+ jmp syscall_return
/* Either reschedule or signal or syscall exit tracking needed. */
/* First do a reschedule test. */
@@ -455,12 +372,11 @@ int_careful:
TRACE_IRQS_OFF
jmp int_with_check
- /* handle signals and tracing -- both require a full stack frame */
+ /* handle signals and tracing -- both require a full pt_regs */
int_very_careful:
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
-int_check_syscall_exit_work:
- SAVE_REST
+ SAVE_EXTRA_REGS
/* Check for syscall exit trace */
testl $_TIF_WORK_SYSCALL_EXIT,%edx
jz int_signal
@@ -479,86 +395,192 @@ int_signal:
call do_notify_resume
1: movl $_TIF_WORK_MASK,%edi
int_restore_rest:
- RESTORE_REST
+ RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
jmp int_with_check
+
+syscall_return:
+ /* The IRETQ could re-enable interrupts: */
+ DISABLE_INTERRUPTS(CLBR_ANY)
+ TRACE_IRQS_IRETQ
+
+ /*
+ * Try to use SYSRET instead of IRET if we're returning to
+ * a completely clean 64-bit userspace context.
+ */
+ movq RCX(%rsp),%rcx
+ cmpq %rcx,RIP(%rsp) /* RCX == RIP */
+ jne opportunistic_sysret_failed
+
+ /*
+ * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+ * in kernel space. This essentially lets the user take over
+ * the kernel, since userspace controls RSP. It's not worth
+ * testing for canonicalness exactly -- this check detects any
+ * of the 17 high bits set, which is true for non-canonical
+ * or kernel addresses. (This will pessimize vsyscall=native.
+ * Big deal.)
+ *
+ * If virtual addresses ever become wider, this will need
+ * to be updated to remain correct on both old and new CPUs.
+ */
+ .ifne __VIRTUAL_MASK_SHIFT - 47
+ .error "virtual address width changed -- SYSRET checks need update"
+ .endif
+ shr $__VIRTUAL_MASK_SHIFT, %rcx
+ jnz opportunistic_sysret_failed
+
+ cmpq $__USER_CS,CS(%rsp) /* CS must match SYSRET */
+ jne opportunistic_sysret_failed
+
+ movq R11(%rsp),%r11
+ cmpq %r11,EFLAGS(%rsp) /* R11 == RFLAGS */
+ jne opportunistic_sysret_failed
+
+ /*
+ * SYSRET can't restore RF. SYSRET can restore TF, but unlike IRET,
+ * restoring TF results in a trap from userspace immediately after
+ * SYSRET. This would cause an infinite loop whenever #DB happens
+ * with register state that satisfies the opportunistic SYSRET
+ * conditions. For example, single-stepping this user code:
+ *
+ * movq $stuck_here,%rcx
+ * pushfq
+ * popq %r11
+ * stuck_here:
+ *
+ * would never get past 'stuck_here'.
+ */
+ testq $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
+ jnz opportunistic_sysret_failed
+
+ /* nothing to check for RSP */
+
+ cmpq $__USER_DS,SS(%rsp) /* SS must match SYSRET */
+ jne opportunistic_sysret_failed
+
+ /*
+ * We win! This label is here just for ease of understanding
+ * perf profiles. Nothing jumps here.
+ */
+syscall_return_via_sysret:
+ CFI_REMEMBER_STATE
+ /* r11 is already restored (see code above) */
+ RESTORE_C_REGS_EXCEPT_R11
+ movq RSP(%rsp),%rsp
+ USERGS_SYSRET64
+ CFI_RESTORE_STATE
+
+opportunistic_sysret_failed:
+ SWAPGS
+ jmp restore_c_regs_and_iret
CFI_ENDPROC
END(system_call)
+
.macro FORK_LIKE func
ENTRY(stub_\func)
CFI_STARTPROC
- popq %r11 /* save return address */
- PARTIAL_FRAME 0
- SAVE_REST
- pushq %r11 /* put it back on stack */
- FIXUP_TOP_OF_STACK %r11, 8
- DEFAULT_FRAME 0 8 /* offset 8: return address */
- call sys_\func
- RESTORE_TOP_OF_STACK %r11, 8
- ret $REST_SKIP /* pop extended registers */
+ DEFAULT_FRAME 0, 8 /* offset 8: return address */
+ SAVE_EXTRA_REGS 8
+ jmp sys_\func
CFI_ENDPROC
END(stub_\func)
.endm
- .macro FIXED_FRAME label,func
-ENTRY(\label)
- CFI_STARTPROC
- PARTIAL_FRAME 0 8 /* offset 8: return address */
- FIXUP_TOP_OF_STACK %r11, 8-ARGOFFSET
- call \func
- RESTORE_TOP_OF_STACK %r11, 8-ARGOFFSET
- ret
- CFI_ENDPROC
-END(\label)
- .endm
-
FORK_LIKE clone
FORK_LIKE fork
FORK_LIKE vfork
- FIXED_FRAME stub_iopl, sys_iopl
ENTRY(stub_execve)
CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call sys_execve
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
+ DEFAULT_FRAME 0, 8
+ call sys_execve
+return_from_execve:
+ testl %eax, %eax
+ jz 1f
+ /* exec failed, can use fast SYSRET code path in this case */
+ ret
+1:
+ /* must use IRET code path (pt_regs->cs may have changed) */
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
+ ZERO_EXTRA_REGS
+ movq %rax,RAX(%rsp)
+ jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_execve)
-
-ENTRY(stub_execveat)
+/*
+ * Remaining execve stubs are only 7 bytes long.
+ * ENTRY() often aligns to 16 bytes, which in this case has no benefits.
+ */
+ .align 8
+GLOBAL(stub_execveat)
CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call sys_execveat
- RESTORE_TOP_OF_STACK %r11
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
+ DEFAULT_FRAME 0, 8
+ call sys_execveat
+ jmp return_from_execve
CFI_ENDPROC
END(stub_execveat)
+#ifdef CONFIG_X86_X32_ABI
+ .align 8
+GLOBAL(stub_x32_execve)
+ CFI_STARTPROC
+ DEFAULT_FRAME 0, 8
+ call compat_sys_execve
+ jmp return_from_execve
+ CFI_ENDPROC
+END(stub_x32_execve)
+ .align 8
+GLOBAL(stub_x32_execveat)
+ CFI_STARTPROC
+ DEFAULT_FRAME 0, 8
+ call compat_sys_execveat
+ jmp return_from_execve
+ CFI_ENDPROC
+END(stub_x32_execveat)
+#endif
+
+#ifdef CONFIG_IA32_EMULATION
+ .align 8
+GLOBAL(stub32_execve)
+ CFI_STARTPROC
+ call compat_sys_execve
+ jmp return_from_execve
+ CFI_ENDPROC
+END(stub32_execve)
+ .align 8
+GLOBAL(stub32_execveat)
+ CFI_STARTPROC
+ call compat_sys_execveat
+ jmp return_from_execve
+ CFI_ENDPROC
+END(stub32_execveat)
+#endif
+
/*
* sigreturn is special because it needs to restore all registers on return.
* This cannot be done with SYSRET, so use the IRET return path instead.
*/
ENTRY(stub_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
+ DEFAULT_FRAME 0, 8
+ /*
+ * SAVE_EXTRA_REGS result is not normally needed:
+ * sigreturn overwrites all pt_regs->GPREGS.
+ * But sigreturn can fail (!), and there is no easy way to detect that.
+ * To make sure RESTORE_EXTRA_REGS doesn't restore garbage on error,
+ * we SAVE_EXTRA_REGS here.
+ */
+ SAVE_EXTRA_REGS 8
call sys_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_REST
+return_from_stub:
+ addq $8, %rsp
+ CFI_ADJUST_CFA_OFFSET -8
+ RESTORE_EXTRA_REGS
+ movq %rax,RAX(%rsp)
jmp int_ret_from_sys_call
CFI_ENDPROC
END(stub_rt_sigreturn)
@@ -566,86 +588,70 @@ END(stub_rt_sigreturn)
#ifdef CONFIG_X86_X32_ABI
ENTRY(stub_x32_rt_sigreturn)
CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
+ DEFAULT_FRAME 0, 8
+ SAVE_EXTRA_REGS 8
call sys32_x32_rt_sigreturn
- movq %rax,RAX(%rsp) # fixme, this could be done at the higher layer
- RESTORE_REST
- jmp int_ret_from_sys_call
+ jmp return_from_stub
CFI_ENDPROC
END(stub_x32_rt_sigreturn)
+#endif
-ENTRY(stub_x32_execve)
- CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call compat_sys_execve
- RESTORE_TOP_OF_STACK %r11
- movq %rax,RAX(%rsp)
- RESTORE_REST
- jmp int_ret_from_sys_call
- CFI_ENDPROC
-END(stub_x32_execve)
+/*
+ * A newly forked process directly context switches into this address.
+ *
+ * rdi: prev task we switched from
+ */
+ENTRY(ret_from_fork)
+ DEFAULT_FRAME
-ENTRY(stub_x32_execveat)
- CFI_STARTPROC
- addq $8, %rsp
- PARTIAL_FRAME 0
- SAVE_REST
- FIXUP_TOP_OF_STACK %r11
- call compat_sys_execveat
- RESTORE_TOP_OF_STACK %r11
- movq %rax,RAX(%rsp)
- RESTORE_REST
+ LOCK ; btr $TIF_FORK,TI_flags(%r8)
+
+ pushq_cfi $0x0002
+ popfq_cfi # reset kernel eflags
+
+ call schedule_tail # rdi: 'prev' task parameter
+
+ RESTORE_EXTRA_REGS
+
+ testl $3,CS(%rsp) # from kernel_thread?
+
+ /*
+ * By the time we get here, we have no idea whether our pt_regs,
+ * ti flags, and ti status came from the 64-bit SYSCALL fast path,
+ * the slow path, or one of the ia32entry paths.
+ * Use IRET code path to return, since it can safely handle
+ * all of the above.
+ */
+ jnz int_ret_from_sys_call
+
+ /* We came from kernel_thread */
+ /* nb: we depend on RESTORE_EXTRA_REGS above */
+ movq %rbp, %rdi
+ call *%rbx
+ movl $0, RAX(%rsp)
+ RESTORE_EXTRA_REGS
jmp int_ret_from_sys_call
CFI_ENDPROC
-END(stub_x32_execveat)
-
-#endif
+END(ret_from_fork)
/*
- * Build the entry stubs and pointer table with some assembler magic.
- * We pack 7 stubs into a single 32-byte chunk, which will fit in a
- * single cache line on all modern x86 implementations.
+ * Build the entry stubs with some assembler magic.
+ * We pack 1 stub into every 8-byte block.
*/
- .section .init.rodata,"a"
-ENTRY(interrupt)
- .section .entry.text
- .p2align 5
- .p2align CONFIG_X86_L1_CACHE_SHIFT
+ .align 8
ENTRY(irq_entries_start)
INTR_FRAME
-vector=FIRST_EXTERNAL_VECTOR
-.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
- .balign 32
- .rept 7
- .if vector < FIRST_SYSTEM_VECTOR
- .if vector <> FIRST_EXTERNAL_VECTOR
+ vector=FIRST_EXTERNAL_VECTOR
+ .rept (FIRST_SYSTEM_VECTOR - FIRST_EXTERNAL_VECTOR)
+ pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
+ vector=vector+1
+ jmp common_interrupt
CFI_ADJUST_CFA_OFFSET -8
- .endif
-1: pushq_cfi $(~vector+0x80) /* Note: always in signed byte range */
- .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6
- jmp 2f
- .endif
- .previous
- .quad 1b
- .section .entry.text
-vector=vector+1
- .endif
- .endr
-2: jmp common_interrupt
-.endr
+ .align 8
+ .endr
CFI_ENDPROC
END(irq_entries_start)
-.previous
-END(interrupt)
-.previous
-
/*
* Interrupt entry/exit.
*
@@ -656,47 +662,45 @@ END(interrupt)
/* 0(%rsp): ~(interrupt number) */
.macro interrupt func
- /* reserve pt_regs for scratch regs and rbp */
- subq $ORIG_RAX-RBP, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-RBP
cld
- /* start from rbp in pt_regs and jump over */
- movq_cfi rdi, (RDI-RBP)
- movq_cfi rsi, (RSI-RBP)
- movq_cfi rdx, (RDX-RBP)
- movq_cfi rcx, (RCX-RBP)
- movq_cfi rax, (RAX-RBP)
- movq_cfi r8, (R8-RBP)
- movq_cfi r9, (R9-RBP)
- movq_cfi r10, (R10-RBP)
- movq_cfi r11, (R11-RBP)
-
- /* Save rbp so that we can unwind from get_irq_regs() */
- movq_cfi rbp, 0
-
- /* Save previous stack value */
- movq %rsp, %rsi
+ /*
+ * Since nothing in interrupt handling code touches r12...r15 members
+ * of "struct pt_regs", and since interrupts can nest, we can save
+ * four stack slots and simultaneously provide
+ * an unwind-friendly stack layout by saving "truncated" pt_regs
+ * exactly up to rbp slot, without these members.
+ */
+ ALLOC_PT_GPREGS_ON_STACK -RBP
+ SAVE_C_REGS -RBP
+ /* this goes to 0(%rsp) for unwinder, not for saving the value: */
+ SAVE_EXTRA_REGS_RBP -RBP
- leaq -RBP(%rsp),%rdi /* arg1 for handler */
- testl $3, CS-RBP(%rsi)
+ leaq -RBP(%rsp),%rdi /* arg1 for \func (pointer to pt_regs) */
+
+ testl $3, CS-RBP(%rsp)
je 1f
SWAPGS
+1:
/*
+ * Save previous stack pointer, optionally switch to interrupt stack.
* irq_count is used to check if a CPU is already on an interrupt stack
* or not. While this is essentially redundant with preempt_count it is
* a little cheaper to use a separate counter in the PDA (short of
* moving irq_enter into assembly, which would be too much work)
*/
-1: incl PER_CPU_VAR(irq_count)
+ movq %rsp, %rsi
+ incl PER_CPU_VAR(irq_count)
cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi
-
- /* Store previous stack value */
pushq %rsi
+ /*
+ * For debugger:
+ * "CFA (Current Frame Address) is the value on stack + offset"
+ */
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
- 0x77 /* DW_OP_breg7 */, 0, \
+ 0x77 /* DW_OP_breg7 (rsp) */, 0, \
0x06 /* DW_OP_deref */, \
- 0x08 /* DW_OP_const1u */, SS+8-RBP, \
+ 0x08 /* DW_OP_const1u */, SIZEOF_PTREGS-RBP, \
0x22 /* DW_OP_plus */
/* We entered an interrupt context - irqs are off: */
TRACE_IRQS_OFF
@@ -714,7 +718,7 @@ common_interrupt:
ASM_CLAC
addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
interrupt do_IRQ
- /* 0(%rsp): old_rsp-ARGOFFSET */
+ /* 0(%rsp): old RSP */
ret_from_intr:
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
@@ -722,19 +726,18 @@ ret_from_intr:
/* Restore saved previous stack */
popq %rsi
- CFI_DEF_CFA rsi,SS+8-RBP /* reg/off reset after def_cfa_expr */
- leaq ARGOFFSET-RBP(%rsi), %rsp
+ CFI_DEF_CFA rsi,SIZEOF_PTREGS-RBP /* reg/off reset after def_cfa_expr */
+ /* return code expects complete pt_regs - adjust rsp accordingly: */
+ leaq -RBP(%rsi),%rsp
CFI_DEF_CFA_REGISTER rsp
- CFI_ADJUST_CFA_OFFSET RBP-ARGOFFSET
+ CFI_ADJUST_CFA_OFFSET RBP
-exit_intr:
- GET_THREAD_INFO(%rcx)
- testl $3,CS-ARGOFFSET(%rsp)
+ testl $3,CS(%rsp)
je retint_kernel
-
/* Interrupt came from user space */
+
+ GET_THREAD_INFO(%rcx)
/*
- * Has a correct top of stack, but a partial stack frame
* %rcx: thread info. Interrupts off.
*/
retint_with_reschedule:
@@ -753,70 +756,34 @@ retint_swapgs: /* return to user-space */
DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_IRETQ
- /*
- * Try to use SYSRET instead of IRET if we're returning to
- * a completely clean 64-bit userspace context.
- */
- movq (RCX-R11)(%rsp), %rcx
- cmpq %rcx,(RIP-R11)(%rsp) /* RCX == RIP */
- jne opportunistic_sysret_failed
-
- /*
- * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
- * in kernel space. This essentially lets the user take over
- * the kernel, since userspace controls RSP. It's not worth
- * testing for canonicalness exactly -- this check detects any
- * of the 17 high bits set, which is true for non-canonical
- * or kernel addresses. (This will pessimize vsyscall=native.
- * Big deal.)
- *
- * If virtual addresses ever become wider, this will need
- * to be updated to remain correct on both old and new CPUs.
- */
- .ifne __VIRTUAL_MASK_SHIFT - 47
- .error "virtual address width changed -- sysret checks need update"
- .endif
- shr $__VIRTUAL_MASK_SHIFT, %rcx
- jnz opportunistic_sysret_failed
-
- cmpq $__USER_CS,(CS-R11)(%rsp) /* CS must match SYSRET */
- jne opportunistic_sysret_failed
-
- movq (R11-ARGOFFSET)(%rsp), %r11
- cmpq %r11,(EFLAGS-ARGOFFSET)(%rsp) /* R11 == RFLAGS */
- jne opportunistic_sysret_failed
-
- testq $X86_EFLAGS_RF,%r11 /* sysret can't restore RF */
- jnz opportunistic_sysret_failed
-
- /* nothing to check for RSP */
-
- cmpq $__USER_DS,(SS-ARGOFFSET)(%rsp) /* SS must match SYSRET */
- jne opportunistic_sysret_failed
-
- /*
- * We win! This label is here just for ease of understanding
- * perf profiles. Nothing jumps here.
- */
-irq_return_via_sysret:
- CFI_REMEMBER_STATE
- RESTORE_ARGS 1,8,1
- movq (RSP-RIP)(%rsp),%rsp
- USERGS_SYSRET64
- CFI_RESTORE_STATE
-
-opportunistic_sysret_failed:
SWAPGS
- jmp restore_args
+ jmp restore_c_regs_and_iret
-retint_restore_args: /* return to kernel space */
- DISABLE_INTERRUPTS(CLBR_ANY)
+/* Returning to kernel space */
+retint_kernel:
+#ifdef CONFIG_PREEMPT
+ /* Interrupts are off */
+ /* Check if we need preemption */
+ bt $9,EFLAGS(%rsp) /* interrupts were off? */
+ jnc 1f
+0: cmpl $0,PER_CPU_VAR(__preempt_count)
+ jnz 1f
+ call preempt_schedule_irq
+ jmp 0b
+1:
+#endif
/*
* The iretq could re-enable interrupts:
*/
TRACE_IRQS_IRETQ
-restore_args:
- RESTORE_ARGS 1,8,1
+
+/*
+ * At this label, code paths which return to kernel and to user,
+ * which come from interrupts/exception and from syscalls, merge.
+ */
+restore_c_regs_and_iret:
+ RESTORE_C_REGS
+ REMOVE_PT_GPREGS_FROM_STACK 8
irq_return:
INTERRUPT_RETURN
@@ -887,28 +854,17 @@ retint_signal:
jz retint_swapgs
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- SAVE_REST
+ SAVE_EXTRA_REGS
movq $-1,ORIG_RAX(%rsp)
xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
- RESTORE_REST
+ RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
jmp retint_with_reschedule
-#ifdef CONFIG_PREEMPT
- /* Returning to kernel space. Check if we need preemption */
- /* rcx: threadinfo. interrupts off. */
-ENTRY(retint_kernel)
- cmpl $0,PER_CPU_VAR(__preempt_count)
- jnz retint_restore_args
- bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
- jnc retint_restore_args
- call preempt_schedule_irq
- jmp exit_intr
-#endif
CFI_ENDPROC
END(common_interrupt)
@@ -997,7 +953,7 @@ apicinterrupt IRQ_WORK_VECTOR \
/*
* Exception entry points.
*/
-#define INIT_TSS_IST(x) PER_CPU_VAR(init_tss) + (TSS_ist + ((x) - 1) * 8)
+#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss) + (TSS_ist + ((x) - 1) * 8)
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
ENTRY(\sym)
@@ -1019,8 +975,7 @@ ENTRY(\sym)
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
.endif
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+ ALLOC_PT_GPREGS_ON_STACK
.if \paranoid
.if \paranoid == 1
@@ -1028,10 +983,11 @@ ENTRY(\sym)
testl $3, CS(%rsp) /* If coming from userspace, switch */
jnz 1f /* stacks. */
.endif
- call save_paranoid
+ call paranoid_entry
.else
call error_entry
.endif
+ /* returned flag: ebx=0: need swapgs on exit, ebx=1: don't need it */
DEFAULT_FRAME 0
@@ -1053,19 +1009,20 @@ ENTRY(\sym)
.endif
.if \shift_ist != -1
- subq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+ subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
.endif
call \do_sym
.if \shift_ist != -1
- addq $EXCEPTION_STKSZ, INIT_TSS_IST(\shift_ist)
+ addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
.endif
+ /* these procedures expect "no swapgs" flag in ebx */
.if \paranoid
- jmp paranoid_exit /* %ebx: no swapgs flag */
+ jmp paranoid_exit
.else
- jmp error_exit /* %ebx: no swapgs flag */
+ jmp error_exit
.endif
.if \paranoid == 1
@@ -1269,7 +1226,9 @@ ENTRY(xen_failsafe_callback)
addq $0x30,%rsp
CFI_ADJUST_CFA_OFFSET -0x30
pushq_cfi $-1 /* orig_ax = -1 => not a system call */
- SAVE_ALL
+ ALLOC_PT_GPREGS_ON_STACK
+ SAVE_C_REGS
+ SAVE_EXTRA_REGS
jmp error_exit
CFI_ENDPROC
END(xen_failsafe_callback)
@@ -1301,59 +1260,66 @@ idtentry async_page_fault do_async_page_fault has_error_code=1
idtentry machine_check has_error_code=0 paranoid=1 do_sym=*machine_check_vector(%rip)
#endif
- /*
- * "Paranoid" exit path from exception stack. This is invoked
- * only on return from non-NMI IST interrupts that came
- * from kernel space.
- *
- * We may be returning to very strange contexts (e.g. very early
- * in syscall entry), so checking for preemption here would
- * be complicated. Fortunately, we there's no good reason
- * to try to handle preemption here.
- */
+/*
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Use slow, but surefire "are we in kernel?" check.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
+ */
+ENTRY(paranoid_entry)
+ XCPT_FRAME 1 15*8
+ cld
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
+ movl $1,%ebx
+ movl $MSR_GS_BASE,%ecx
+ rdmsr
+ testl %edx,%edx
+ js 1f /* negative -> in kernel */
+ SWAPGS
+ xorl %ebx,%ebx
+1: ret
+ CFI_ENDPROC
+END(paranoid_entry)
- /* ebx: no swapgs flag */
+/*
+ * "Paranoid" exit path from exception stack. This is invoked
+ * only on return from non-NMI IST interrupts that came
+ * from kernel space.
+ *
+ * We may be returning to very strange contexts (e.g. very early
+ * in syscall entry), so checking for preemption here would
+ * be complicated. Fortunately, we there's no good reason
+ * to try to handle preemption here.
+ */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
ENTRY(paranoid_exit)
DEFAULT_FRAME
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF_DEBUG
testl %ebx,%ebx /* swapgs needed? */
- jnz paranoid_restore
- TRACE_IRQS_IRETQ 0
+ jnz paranoid_exit_no_swapgs
+ TRACE_IRQS_IRETQ
SWAPGS_UNSAFE_STACK
- RESTORE_ALL 8
- INTERRUPT_RETURN
-paranoid_restore:
- TRACE_IRQS_IRETQ_DEBUG 0
- RESTORE_ALL 8
+ jmp paranoid_exit_restore
+paranoid_exit_no_swapgs:
+ TRACE_IRQS_IRETQ_DEBUG
+paranoid_exit_restore:
+ RESTORE_EXTRA_REGS
+ RESTORE_C_REGS
+ REMOVE_PT_GPREGS_FROM_STACK 8
INTERRUPT_RETURN
CFI_ENDPROC
END(paranoid_exit)
/*
- * Exception entry point. This expects an error code/orig_rax on the stack.
- * returns in "no swapgs flag" in %ebx.
+ * Save all registers in pt_regs, and switch gs if needed.
+ * Return: ebx=0: need swapgs on exit, ebx=1: otherwise
*/
ENTRY(error_entry)
- XCPT_FRAME
- CFI_ADJUST_CFA_OFFSET 15*8
- /* oldrax contains error code */
+ XCPT_FRAME 1 15*8
cld
- movq %rdi, RDI+8(%rsp)
- movq %rsi, RSI+8(%rsp)
- movq %rdx, RDX+8(%rsp)
- movq %rcx, RCX+8(%rsp)
- movq %rax, RAX+8(%rsp)
- movq %r8, R8+8(%rsp)
- movq %r9, R9+8(%rsp)
- movq %r10, R10+8(%rsp)
- movq %r11, R11+8(%rsp)
- movq_cfi rbx, RBX+8
- movq %rbp, RBP+8(%rsp)
- movq %r12, R12+8(%rsp)
- movq %r13, R13+8(%rsp)
- movq %r14, R14+8(%rsp)
- movq %r15, R15+8(%rsp)
+ SAVE_C_REGS 8
+ SAVE_EXTRA_REGS 8
xorl %ebx,%ebx
testl $3,CS+8(%rsp)
je error_kernelspace
@@ -1363,12 +1329,12 @@ error_sti:
TRACE_IRQS_OFF
ret
-/*
- * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here. B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
- */
+ /*
+ * There are two places in the kernel that can potentially fault with
+ * usergs. Handle them here. B stepping K8s sometimes report a
+ * truncated RIP for IRET exceptions returning to compat mode. Check
+ * for these here too.
+ */
error_kernelspace:
CFI_REL_OFFSET rcx, RCX+8
incl %ebx
@@ -1398,11 +1364,11 @@ error_bad_iret:
END(error_entry)
-/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */
+/* On entry, ebx is "no swapgs" flag (1: don't need swapgs, 0: need it) */
ENTRY(error_exit)
DEFAULT_FRAME
movl %ebx,%eax
- RESTORE_REST
+ RESTORE_EXTRA_REGS
DISABLE_INTERRUPTS(CLBR_NONE)
TRACE_IRQS_OFF
GET_THREAD_INFO(%rcx)
@@ -1417,19 +1383,7 @@ ENTRY(error_exit)
CFI_ENDPROC
END(error_exit)
-/*
- * Test if a given stack is an NMI stack or not.
- */
- .macro test_in_nmi reg stack nmi_ret normal_ret
- cmpq %\reg, \stack
- ja \normal_ret
- subq $EXCEPTION_STKSZ, %\reg
- cmpq %\reg, \stack
- jb \normal_ret
- jmp \nmi_ret
- .endm
-
- /* runs on exception stack */
+/* Runs on exception stack */
ENTRY(nmi)
INTR_FRAME
PARAVIRT_ADJUST_EXCEPTION_FRAME
@@ -1465,7 +1419,7 @@ ENTRY(nmi)
* NMI.
*/
- /* Use %rdx as out temp variable throughout */
+ /* Use %rdx as our temp variable throughout */
pushq_cfi %rdx
CFI_REL_OFFSET rdx, 0
@@ -1490,8 +1444,17 @@ ENTRY(nmi)
* We check the variable because the first NMI could be in a
* breakpoint routine using a breakpoint stack.
*/
- lea 6*8(%rsp), %rdx
- test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
+ lea 6*8(%rsp), %rdx
+ /* Compare the NMI stack (rdx) with the stack we came from (4*8(%rsp)) */
+ cmpq %rdx, 4*8(%rsp)
+ /* If the stack pointer is above the NMI stack, this is a normal NMI */
+ ja first_nmi
+ subq $EXCEPTION_STKSZ, %rdx
+ cmpq %rdx, 4*8(%rsp)
+ /* If it is below the NMI stack, it is a normal NMI */
+ jb first_nmi
+ /* Ah, it is within the NMI stack, treat it as nested */
+
CFI_REMEMBER_STATE
nested_nmi:
@@ -1584,7 +1547,7 @@ first_nmi:
.rept 5
pushq_cfi 11*8(%rsp)
.endr
- CFI_DEF_CFA_OFFSET SS+8-RIP
+ CFI_DEF_CFA_OFFSET 5*8
/* Everything up to here is safe from nested NMIs */
@@ -1612,7 +1575,7 @@ repeat_nmi:
pushq_cfi -6*8(%rsp)
.endr
subq $(5*8), %rsp
- CFI_DEF_CFA_OFFSET SS+8-RIP
+ CFI_DEF_CFA_OFFSET 5*8
end_repeat_nmi:
/*
@@ -1621,16 +1584,16 @@ end_repeat_nmi:
* so that we repeat another NMI.
*/
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
- subq $ORIG_RAX-R15, %rsp
- CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+ ALLOC_PT_GPREGS_ON_STACK
+
/*
- * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+ * Use paranoid_entry to handle SWAPGS, but no need to use paranoid_exit
* as we should not be calling schedule in NMI context.
* Even with normal interrupts enabled. An NMI should not be
* setting NEED_RESCHED or anything that normal interrupts and
* exceptions might do.
*/
- call save_paranoid
+ call paranoid_entry
DEFAULT_FRAME 0
/*
@@ -1661,8 +1624,10 @@ end_repeat_nmi:
nmi_swapgs:
SWAPGS_UNSAFE_STACK
nmi_restore:
+ RESTORE_EXTRA_REGS
+ RESTORE_C_REGS
/* Pop the extra iret frame at once */
- RESTORE_ALL 6*8
+ REMOVE_PT_GPREGS_FROM_STACK 6*8
/* Clear the NMI executing stack variable */
movq $0, 5*8(%rsp)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index c4f8d4659070..2b55ee6db053 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -177,9 +177,6 @@ asmlinkage __visible void __init x86_64_start_kernel(char * real_mode_data)
*/
load_ucode_bsp();
- if (console_loglevel >= CONSOLE_LOGLEVEL_DEBUG)
- early_printk("Kernel alive\n");
-
clear_page(init_level4_pgt);
/* set init_level4_pgt kernel high mapping*/
init_level4_pgt[511] = early_level4_pgt[511];
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index f36bd42d6f0c..d031bad9e07e 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -22,6 +22,7 @@
#include <asm/cpufeature.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include <asm/bootparam.h>
/* Physical address */
#define pa(X) ((X) - __PAGE_OFFSET)
@@ -90,7 +91,7 @@ ENTRY(startup_32)
/* test KEEP_SEGMENTS flag to see if the bootloader is asking
us to not reload segments */
- testb $(1<<6), BP_loadflags(%esi)
+ testb $KEEP_SEGMENTS, BP_loadflags(%esi)
jnz 2f
/*
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 6fd514d9f69a..ae6588b301c2 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -1,5 +1,5 @@
/*
- * linux/arch/x86_64/kernel/head.S -- start in 32bit and switch to 64bit
+ * linux/arch/x86/kernel/head_64.S -- start in 32bit and switch to 64bit
*
* Copyright (C) 2000 Andrea Arcangeli <andrea@suse.de> SuSE
* Copyright (C) 2000 Pavel Machek <pavel@suse.cz>
@@ -56,7 +56,7 @@ startup_64:
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either directly from a 64bit bootloader, or from
- * arch/x86_64/boot/compressed/head.S.
+ * arch/x86/boot/compressed/head_64.S.
*
* We only come here initially at boot nothing else comes here.
*
@@ -146,7 +146,7 @@ startup_64:
leaq level2_kernel_pgt(%rip), %rdi
leaq 4096(%rdi), %r8
/* See if it is a valid page table entry */
-1: testq $1, 0(%rdi)
+1: testb $1, 0(%rdi)
jz 2f
addq %rbp, 0(%rdi)
/* Go to the next page */
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index d5651fce0b71..29c740deafec 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -68,7 +68,7 @@ static inline bool interrupted_kernel_fpu_idle(void)
static inline bool interrupted_user_mode(void)
{
struct pt_regs *regs = get_irq_regs();
- return regs && user_mode_vm(regs);
+ return regs && user_mode(regs);
}
/*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index 4ddaf66ea35f..37dae792dbbe 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -54,7 +54,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on)
* because the ->io_bitmap_max value must match the bitmap
* contents:
*/
- tss = &per_cpu(init_tss, get_cpu());
+ tss = &per_cpu(cpu_tss, get_cpu());
if (turn_on)
bitmap_clear(t->io_bitmap_ptr, from, num);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 28d28f5eb8f4..f9fd86a7fcc7 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -165,7 +165,7 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
if (unlikely(!desc))
return false;
- if (user_mode_vm(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
+ if (user_mode(regs) || !execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
desc->handle_irq(irq, desc);
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index e4b503d5558c..394e643d7830 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -44,7 +44,7 @@ static inline void stack_overflow_check(struct pt_regs *regs)
u64 estack_top, estack_bottom;
u64 curbase = (u64)task_stack_page(current);
- if (user_mode_vm(regs))
+ if (user_mode(regs))
return;
if (regs->sp >= curbase + sizeof(struct thread_info) +
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 70e181ea1eac..cd10a6437264 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -178,7 +178,8 @@ void __init native_init_IRQ(void)
#endif
for_each_clear_bit_from(i, used_vectors, first_system_vector) {
/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
- set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+ set_intr_gate(i, irq_entries_start +
+ 8 * (i - FIRST_EXTERNAL_VECTOR));
}
#ifdef CONFIG_X86_LOCAL_APIC
for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 7ec1d5f8d283..d6178d9791db 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -72,7 +72,7 @@ struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] =
{ "bx", 8, offsetof(struct pt_regs, bx) },
{ "cx", 8, offsetof(struct pt_regs, cx) },
{ "dx", 8, offsetof(struct pt_regs, dx) },
- { "si", 8, offsetof(struct pt_regs, dx) },
+ { "si", 8, offsetof(struct pt_regs, si) },
{ "di", 8, offsetof(struct pt_regs, di) },
{ "bp", 8, offsetof(struct pt_regs, bp) },
{ "sp", 8, offsetof(struct pt_regs, sp) },
@@ -126,11 +126,11 @@ char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs)
#ifdef CONFIG_X86_32
switch (regno) {
case GDB_SS:
- if (!user_mode_vm(regs))
+ if (!user_mode(regs))
*(unsigned long *)mem = __KERNEL_DS;
break;
case GDB_SP:
- if (!user_mode_vm(regs))
+ if (!user_mode(regs))
*(unsigned long *)mem = kernel_stack_pointer(regs);
break;
case GDB_GS:
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index 4e3d5a9621fe..24d079604fd5 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -602,7 +602,7 @@ int kprobe_int3_handler(struct pt_regs *regs)
struct kprobe *p;
struct kprobe_ctlblk *kcb;
- if (user_mode_vm(regs))
+ if (user_mode(regs))
return 0;
addr = (kprobe_opcode_t *)(regs->ip - sizeof(kprobe_opcode_t));
@@ -1007,7 +1007,7 @@ int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
struct die_args *args = data;
int ret = NOTIFY_DONE;
- if (args->regs && user_mode_vm(args->regs))
+ if (args->regs && user_mode(args->regs))
return ret;
if (val == DIE_GPF) {
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 9bbb9b35c144..005c03e93fc5 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -33,6 +33,7 @@
#include <asm/page.h>
#include <asm/pgtable.h>
+#include <asm/setup.h>
#if 0
#define DEBUGP(fmt, ...) \
@@ -53,7 +54,7 @@ static DEFINE_MUTEX(module_kaslr_mutex);
static unsigned long int get_module_load_offset(void)
{
- if (kaslr_enabled) {
+ if (kaslr_enabled()) {
mutex_lock(&module_kaslr_mutex);
/*
* Calculate the module_load_offset the first time this
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 781861cc5ee8..da8cb987b973 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -131,10 +131,11 @@ void perf_get_regs_user(struct perf_regs *regs_user,
}
/*
- * RIP, flags, and the argument registers are usually saved.
- * orig_ax is probably okay, too.
+ * These registers are always saved on 64-bit syscall entry.
+ * On 32-bit entry points, they are saved too except r8..r11.
*/
regs_user_copy->ip = user_regs->ip;
+ regs_user_copy->ax = user_regs->ax;
regs_user_copy->cx = user_regs->cx;
regs_user_copy->dx = user_regs->dx;
regs_user_copy->si = user_regs->si;
@@ -145,9 +146,12 @@ void perf_get_regs_user(struct perf_regs *regs_user,
regs_user_copy->r11 = user_regs->r11;
regs_user_copy->orig_ax = user_regs->orig_ax;
regs_user_copy->flags = user_regs->flags;
+ regs_user_copy->sp = user_regs->sp;
+ regs_user_copy->cs = user_regs->cs;
+ regs_user_copy->ss = user_regs->ss;
/*
- * Don't even try to report the "rest" regs.
+ * Most system calls don't save these registers, don't report them.
*/
regs_user_copy->bx = -1;
regs_user_copy->bp = -1;
@@ -158,37 +162,13 @@ void perf_get_regs_user(struct perf_regs *regs_user,
/*
* For this to be at all useful, we need a reasonable guess for
- * sp and the ABI. Be careful: we're in NMI context, and we're
+ * the ABI. Be careful: we're in NMI context, and we're
* considering current to be the current task, so we should
* be careful not to look at any other percpu variables that might
* change during context switches.
*/
- if (IS_ENABLED(CONFIG_IA32_EMULATION) &&
- task_thread_info(current)->status & TS_COMPAT) {
- /* Easy case: we're in a compat syscall. */
- regs_user->abi = PERF_SAMPLE_REGS_ABI_32;
- regs_user_copy->sp = user_regs->sp;
- regs_user_copy->cs = user_regs->cs;
- regs_user_copy->ss = user_regs->ss;
- } else if (user_regs->orig_ax != -1) {
- /*
- * We're probably in a 64-bit syscall.
- * Warning: this code is severely racy. At least it's better
- * than just blindly copying user_regs.
- */
- regs_user->abi = PERF_SAMPLE_REGS_ABI_64;
- regs_user_copy->sp = this_cpu_read(old_rsp);
- regs_user_copy->cs = __USER_CS;
- regs_user_copy->ss = __USER_DS;
- regs_user_copy->cx = -1; /* usually contains garbage */
- } else {
- /* We're probably in an interrupt or exception. */
- regs_user->abi = user_64bit_mode(user_regs) ?
- PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
- regs_user_copy->sp = user_regs->sp;
- regs_user_copy->cs = user_regs->cs;
- regs_user_copy->ss = user_regs->ss;
- }
+ regs_user->abi = user_64bit_mode(user_regs) ?
+ PERF_SAMPLE_REGS_ABI_64 : PERF_SAMPLE_REGS_ABI_32;
regs_user->regs = regs_user_copy;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 046e2d620bbe..0c8992dbead5 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,7 +9,7 @@
#include <linux/sched.h>
#include <linux/module.h>
#include <linux/pm.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <linux/random.h>
#include <linux/user-return-notifier.h>
#include <linux/dmi.h>
@@ -24,6 +24,7 @@
#include <asm/syscalls.h>
#include <asm/idle.h>
#include <asm/uaccess.h>
+#include <asm/mwait.h>
#include <asm/i387.h>
#include <asm/fpu-internal.h>
#include <asm/debugreg.h>
@@ -37,7 +38,26 @@
* section. Since TSS's are completely CPU-local, we want them
* on exact cacheline boundaries, to eliminate cacheline ping-pong.
*/
-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
+__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = {
+ .x86_tss = {
+ .sp0 = TOP_OF_INIT_STACK,
+#ifdef CONFIG_X86_32
+ .ss0 = __KERNEL_DS,
+ .ss1 = __KERNEL_CS,
+ .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
+#endif
+ },
+#ifdef CONFIG_X86_32
+ /*
+ * Note that the .io_bitmap member must be extra-big. This is because
+ * the CPU will access an additional byte beyond the end of the IO
+ * permission bitmap. The extra byte must be all 1 bits, and must
+ * be within the limit.
+ */
+ .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
+#endif
+};
+EXPORT_PER_CPU_SYMBOL_GPL(cpu_tss);
#ifdef CONFIG_X86_64
static DEFINE_PER_CPU(unsigned char, is_idle);
@@ -109,7 +129,7 @@ void exit_thread(void)
unsigned long *bp = t->io_bitmap_ptr;
if (bp) {
- struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
+ struct tss_struct *tss = &per_cpu(cpu_tss, get_cpu());
t->io_bitmap_ptr = NULL;
clear_thread_flag(TIF_IO_BITMAP);
@@ -377,14 +397,11 @@ static void amd_e400_idle(void)
if (!cpumask_test_cpu(cpu, amd_e400_c1e_mask)) {
cpumask_set_cpu(cpu, amd_e400_c1e_mask);
- /*
- * Force broadcast so ACPI can not interfere.
- */
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
- &cpu);
+ /* Force broadcast so ACPI can not interfere. */
+ tick_broadcast_force();
pr_info("Switch to broadcast mode on CPU%d\n", cpu);
}
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
+ tick_broadcast_enter();
default_idle();
@@ -393,12 +410,59 @@ static void amd_e400_idle(void)
* called with interrupts disabled.
*/
local_irq_disable();
- clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
+ tick_broadcast_exit();
local_irq_enable();
} else
default_idle();
}
+/*
+ * Intel Core2 and older machines prefer MWAIT over HALT for C1.
+ * We can't rely on cpuidle installing MWAIT, because it will not load
+ * on systems that support only C1 -- so the boot default must be MWAIT.
+ *
+ * Some AMD machines are the opposite, they depend on using HALT.
+ *
+ * So for default C1, which is used during boot until cpuidle loads,
+ * use MWAIT-C1 on Intel HW that has it, else use HALT.
+ */
+static int prefer_mwait_c1_over_halt(const struct cpuinfo_x86 *c)
+{
+ if (c->x86_vendor != X86_VENDOR_INTEL)
+ return 0;
+
+ if (!cpu_has(c, X86_FEATURE_MWAIT))
+ return 0;
+
+ return 1;
+}
+
+/*
+ * MONITOR/MWAIT with no hints, used for default default C1 state.
+ * This invokes MWAIT with interrutps enabled and no flags,
+ * which is backwards compatible with the original MWAIT implementation.
+ */
+
+static void mwait_idle(void)
+{
+ if (!current_set_polling_and_test()) {
+ if (this_cpu_has(X86_BUG_CLFLUSH_MONITOR)) {
+ smp_mb(); /* quirk */
+ clflush((void *)&current_thread_info()->flags);
+ smp_mb(); /* quirk */
+ }
+
+ __monitor((void *)&current_thread_info()->flags, 0, 0);
+ if (!need_resched())
+ __sti_mwait(0, 0);
+ else
+ local_irq_enable();
+ } else {
+ local_irq_enable();
+ }
+ __current_clr_polling();
+}
+
void select_idle_routine(const struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
@@ -412,6 +476,9 @@ void select_idle_routine(const struct cpuinfo_x86 *c)
/* E400: APIC timer interrupt does not wake up CPU from C1e */
pr_info("using AMD E400 aware idle routine\n");
x86_idle = amd_e400_idle;
+ } else if (prefer_mwait_c1_over_halt(c)) {
+ pr_info("using mwait in idle threads\n");
+ x86_idle = mwait_idle;
} else
x86_idle = default_idle;
}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 603c4f99cb5a..8ed2106b06da 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -73,7 +73,7 @@ void __show_regs(struct pt_regs *regs, int all)
unsigned long sp;
unsigned short ss, gs;
- if (user_mode_vm(regs)) {
+ if (user_mode(regs)) {
sp = regs->sp;
ss = regs->ss & 0xffff;
gs = get_user_gs(regs);
@@ -206,11 +206,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
regs->ip = new_ip;
regs->sp = new_sp;
regs->flags = X86_EFLAGS_IF;
- /*
- * force it to the iret return path by making it look as if there was
- * some work pending.
- */
- set_thread_flag(TIF_NOTIFY_RESUME);
+ force_iret();
}
EXPORT_SYMBOL_GPL(start_thread);
@@ -248,7 +244,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
fpu_switch_t fpu;
/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
@@ -256,11 +252,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
/*
- * Reload esp0.
- */
- load_sp0(tss, next);
-
- /*
* Save away %gs. No need to save %fs, as it was saved on the
* stack on entry. No need to save %es and %ds, as those are
* always kernel segments while inside the kernel. Doing this
@@ -310,9 +301,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*/
arch_end_context_switch(next_p);
+ /*
+ * Reload esp0, kernel_stack, and current_top_of_stack. This changes
+ * current_thread_info().
+ */
+ load_sp0(tss, next);
this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - KERNEL_STACK_OFFSET);
+ (unsigned long)task_stack_page(next_p) +
+ THREAD_SIZE);
+ this_cpu_write(cpu_current_top_of_stack,
+ (unsigned long)task_stack_page(next_p) +
+ THREAD_SIZE);
/*
* Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 67fcc43577d2..4baaa972f52a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -52,7 +52,7 @@
asmlinkage extern void ret_from_fork(void);
-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
+__visible DEFINE_PER_CPU(unsigned long, rsp_scratch);
/* Prints also some state that isn't saved in the pt_regs */
void __show_regs(struct pt_regs *regs, int all)
@@ -161,7 +161,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
childregs = task_pt_regs(p);
p->thread.sp = (unsigned long) childregs;
- p->thread.usersp = me->thread.usersp;
set_tsk_thread_flag(p, TIF_FORK);
p->thread.io_bitmap_ptr = NULL;
@@ -207,7 +206,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
*/
if (clone_flags & CLONE_SETTLS) {
#ifdef CONFIG_IA32_EMULATION
- if (test_thread_flag(TIF_IA32))
+ if (is_ia32_task())
err = do_set_thread_area(p, -1,
(struct user_desc __user *)childregs->si, 0);
else
@@ -235,13 +234,12 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
- current->thread.usersp = new_sp;
regs->ip = new_ip;
regs->sp = new_sp;
- this_cpu_write(old_rsp, new_sp);
regs->cs = _cs;
regs->ss = _ss;
regs->flags = X86_EFLAGS_IF;
+ force_iret();
}
void
@@ -277,15 +275,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct thread_struct *prev = &prev_p->thread;
struct thread_struct *next = &next_p->thread;
int cpu = smp_processor_id();
- struct tss_struct *tss = &per_cpu(init_tss, cpu);
+ struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
unsigned fsindex, gsindex;
fpu_switch_t fpu;
fpu = switch_fpu_prepare(prev_p, next_p, cpu);
- /* Reload esp0 and ss1. */
- load_sp0(tss, next);
-
/* We must save %fs and %gs before load_TLS() because
* %fs and %gs may be cleared by load_TLS().
*
@@ -401,8 +396,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
/*
* Switch the PDA and FPU contexts.
*/
- prev->usersp = this_cpu_read(old_rsp);
- this_cpu_write(old_rsp, next->usersp);
this_cpu_write(current_task, next_p);
/*
@@ -413,9 +406,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
task_thread_info(prev_p)->saved_preempt_count = this_cpu_read(__preempt_count);
this_cpu_write(__preempt_count, task_thread_info(next_p)->saved_preempt_count);
+ /* Reload esp0 and ss1. This changes current_thread_info(). */
+ load_sp0(tss, next);
+
this_cpu_write(kernel_stack,
- (unsigned long)task_stack_page(next_p) +
- THREAD_SIZE - KERNEL_STACK_OFFSET);
+ (unsigned long)task_stack_page(next_p) + THREAD_SIZE);
/*
* Now maybe reload the debug registers and handle I/O bitmaps
@@ -602,6 +597,5 @@ long sys_arch_prctl(int code, unsigned long addr)
unsigned long KSTK_ESP(struct task_struct *task)
{
- return (test_tsk_thread_flag(task, TIF_IA32)) ?
- (task_pt_regs(task)->sp) : ((task)->thread.usersp);
+ return task_pt_regs(task)->sp;
}
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index e510618b2e91..a7bc79480719 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -364,18 +364,12 @@ static int set_segment_reg(struct task_struct *task,
case offsetof(struct user_regs_struct,cs):
if (unlikely(value == 0))
return -EIO;
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- task_pt_regs(task)->cs = value;
-#endif
+ task_pt_regs(task)->cs = value;
break;
case offsetof(struct user_regs_struct,ss):
if (unlikely(value == 0))
return -EIO;
-#ifdef CONFIG_IA32_EMULATION
- if (test_tsk_thread_flag(task, TIF_IA32))
- task_pt_regs(task)->ss = value;
-#endif
+ task_pt_regs(task)->ss = value;
break;
}
@@ -1421,7 +1415,7 @@ static void fill_sigtrap_info(struct task_struct *tsk,
memset(info, 0, sizeof(*info));
info->si_signo = SIGTRAP;
info->si_code = si_code;
- info->si_addr = user_mode_vm(regs) ? (void __user *)regs->ip : NULL;
+ info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL;
}
void user_single_step_siginfo(struct task_struct *tsk,
diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index 2f355d229a58..e5ecd20e72dd 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -141,7 +141,46 @@ void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
}
+static struct pvclock_vsyscall_time_info *pvclock_vdso_info;
+
+static struct pvclock_vsyscall_time_info *
+pvclock_get_vsyscall_user_time_info(int cpu)
+{
+ if (!pvclock_vdso_info) {
+ BUG();
+ return NULL;
+ }
+
+ return &pvclock_vdso_info[cpu];
+}
+
+struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
+{
+ return &pvclock_get_vsyscall_user_time_info(cpu)->pvti;
+}
+
#ifdef CONFIG_X86_64
+static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
+ void *v)
+{
+ struct task_migration_notifier *mn = v;
+ struct pvclock_vsyscall_time_info *pvti;
+
+ pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
+
+ /* this is NULL when pvclock vsyscall is not initialized */
+ if (unlikely(pvti == NULL))
+ return NOTIFY_DONE;
+
+ pvti->migrate_count++;
+
+ return NOTIFY_DONE;
+}
+
+static struct notifier_block pvclock_migrate = {
+ .notifier_call = pvclock_task_migrate,
+};
+
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -155,12 +194,17 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
WARN_ON (size != PVCLOCK_VSYSCALL_NR_PAGES*PAGE_SIZE);
+ pvclock_vdso_info = i;
+
for (idx = 0; idx <= (PVCLOCK_FIXMAP_END-PVCLOCK_FIXMAP_BEGIN); idx++) {
__set_fixmap(PVCLOCK_FIXMAP_BEGIN + idx,
__pa(i) + (idx*PAGE_SIZE),
PAGE_KERNEL_VVAR);
}
+
+ register_task_migration_notifier(&pvclock_migrate);
+
return 0;
}
#endif
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index bae6c609888e..86db4bcd7ce5 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -183,6 +183,16 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
},
},
+ /* ASRock */
+ { /* Handle problems with rebooting on ASRock Q1900DC-ITX */
+ .callback = set_pci_reboot,
+ .ident = "ASRock Q1900DC-ITX",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "ASRock"),
+ DMI_MATCH(DMI_BOARD_NAME, "Q1900DC-ITX"),
+ },
+ },
+
/* ASUS */
{ /* Handle problems with rebooting on ASUS P4S800 */
.callback = set_bios_reboot,
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index e13f8e7c22a6..77630d57e7bf 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -226,23 +226,23 @@ swap_pages:
movl (%ebx), %ecx
addl $4, %ebx
1:
- testl $0x1, %ecx /* is it a destination page */
+ testb $0x1, %cl /* is it a destination page */
jz 2f
movl %ecx, %edi
andl $0xfffff000, %edi
jmp 0b
2:
- testl $0x2, %ecx /* is it an indirection page */
+ testb $0x2, %cl /* is it an indirection page */
jz 2f
movl %ecx, %ebx
andl $0xfffff000, %ebx
jmp 0b
2:
- testl $0x4, %ecx /* is it the done indicator */
+ testb $0x4, %cl /* is it the done indicator */
jz 2f
jmp 3f
2:
- testl $0x8, %ecx /* is it the source indicator */
+ testb $0x8, %cl /* is it the source indicator */
jz 0b /* Ignore it otherwise */
movl %ecx, %esi /* For every source page do a copy */
andl $0xfffff000, %esi
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 3fd2c693e475..98111b38ebfd 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -123,7 +123,7 @@ identity_mapped:
* Set cr4 to a known state:
* - physical address extension enabled
*/
- movq $X86_CR4_PAE, %rax
+ movl $X86_CR4_PAE, %eax
movq %rax, %cr4
jmp 1f
@@ -221,23 +221,23 @@ swap_pages:
movq (%rbx), %rcx
addq $8, %rbx
1:
- testq $0x1, %rcx /* is it a destination page? */
+ testb $0x1, %cl /* is it a destination page? */
jz 2f
movq %rcx, %rdi
andq $0xfffffffffffff000, %rdi
jmp 0b
2:
- testq $0x2, %rcx /* is it an indirection page? */
+ testb $0x2, %cl /* is it an indirection page? */
jz 2f
movq %rcx, %rbx
andq $0xfffffffffffff000, %rbx
jmp 0b
2:
- testq $0x4, %rcx /* is it the done indicator? */
+ testb $0x4, %cl /* is it the done indicator? */
jz 2f
jmp 3f
2:
- testq $0x8, %rcx /* is it the source indicator? */
+ testb $0x8, %cl /* is it the source indicator? */
jz 0b /* Ignore it otherwise */
movq %rcx, %rsi /* For ever source page do a copy */
andq $0xfffffffffffff000, %rsi
@@ -246,17 +246,17 @@ swap_pages:
movq %rsi, %rax
movq %r10, %rdi
- movq $512, %rcx
+ movl $512, %ecx
rep ; movsq
movq %rax, %rdi
movq %rdx, %rsi
- movq $512, %rcx
+ movl $512, %ecx
rep ; movsq
movq %rdx, %rdi
movq %r10, %rsi
- movq $512, %rcx
+ movl $512, %ecx
rep ; movsq
lea PAGE_SIZE(%rax), %rsi
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 98dc9317286e..014466b152b5 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -122,8 +122,6 @@
unsigned long max_low_pfn_mapped;
unsigned long max_pfn_mapped;
-bool __read_mostly kaslr_enabled = false;
-
#ifdef CONFIG_DMI
RESERVE_BRK(dmi_alloc, 65536);
#endif
@@ -427,11 +425,6 @@ static void __init reserve_initrd(void)
}
#endif /* CONFIG_BLK_DEV_INITRD */
-static void __init parse_kaslr_setup(u64 pa_data, u32 data_len)
-{
- kaslr_enabled = (bool)(pa_data + sizeof(struct setup_data));
-}
-
static void __init parse_setup_data(void)
{
struct setup_data *data;
@@ -457,9 +450,6 @@ static void __init parse_setup_data(void)
case SETUP_EFI:
parse_efi_setup(pa_data, data_len);
break;
- case SETUP_KASLR:
- parse_kaslr_setup(pa_data, data_len);
- break;
default:
break;
}
@@ -842,14 +832,15 @@ static void __init trim_low_memory_range(void)
static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{
- if (kaslr_enabled)
+ if (kaslr_enabled()) {
pr_emerg("Kernel Offset: 0x%lx from 0x%lx (relocation range: 0x%lx-0x%lx)\n",
(unsigned long)&_text - __START_KERNEL,
__START_KERNEL,
__START_KERNEL_map,
MODULES_VADDR-1);
- else
+ } else {
pr_emerg("Kernel Offset: disabled\n");
+ }
return 0;
}
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index e5042463c1bc..53cc4085c3d7 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -61,8 +61,7 @@
regs->seg = GET_SEG(seg) | 3; \
} while (0)
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
- unsigned long *pax)
+int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
{
void __user *buf;
unsigned int tmpflags;
@@ -81,7 +80,7 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
#endif /* CONFIG_X86_32 */
COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
- COPY(dx); COPY(cx); COPY(ip);
+ COPY(dx); COPY(cx); COPY(ip); COPY(ax);
#ifdef CONFIG_X86_64
COPY(r8);
@@ -94,27 +93,20 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
COPY(r15);
#endif /* CONFIG_X86_64 */
-#ifdef CONFIG_X86_32
COPY_SEG_CPL3(cs);
COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
- /* Kernel saves and restores only the CS segment register on signals,
- * which is the bare minimum needed to allow mixed 32/64-bit code.
- * App's signal handler can save/restore other segments if needed. */
- COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
get_user_ex(tmpflags, &sc->flags);
regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
regs->orig_ax = -1; /* disable syscall checks */
get_user_ex(buf, &sc->fpstate);
-
- get_user_ex(*pax, &sc->ax);
} get_user_catch(err);
err |= restore_xstate_sig(buf, config_enabled(CONFIG_X86_32));
+ force_iret();
+
return err;
}
@@ -162,8 +154,9 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
#else /* !CONFIG_X86_32 */
put_user_ex(regs->flags, &sc->flags);
put_user_ex(regs->cs, &sc->cs);
- put_user_ex(0, &sc->gs);
- put_user_ex(0, &sc->fs);
+ put_user_ex(0, &sc->__pad2);
+ put_user_ex(0, &sc->__pad1);
+ put_user_ex(regs->ss, &sc->ss);
#endif /* CONFIG_X86_32 */
put_user_ex(fpstate, &sc->fpstate);
@@ -457,9 +450,19 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
regs->sp = (unsigned long)frame;
- /* Set up the CS register to run signal handlers in 64-bit mode,
- even if the handler happens to be interrupting 32-bit code. */
+ /*
+ * Set up the CS and SS registers to run signal handlers in
+ * 64-bit mode, even if the handler happens to be interrupting
+ * 32-bit or 16-bit code.
+ *
+ * SS is subtle. In 64-bit mode, we don't need any particular
+ * SS descriptor, but we do need SS to be valid. It's possible
+ * that the old SS is entirely bogus -- this can happen if the
+ * signal we're trying to deliver is #GP or #SS caused by a bad
+ * SS value.
+ */
regs->cs = __USER_CS;
+ regs->ss = __USER_DS;
return 0;
}
@@ -539,7 +542,6 @@ asmlinkage unsigned long sys_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct sigframe __user *frame;
- unsigned long ax;
sigset_t set;
frame = (struct sigframe __user *)(regs->sp - 8);
@@ -553,9 +555,9 @@ asmlinkage unsigned long sys_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->sc, &ax))
+ if (restore_sigcontext(regs, &frame->sc))
goto badframe;
- return ax;
+ return regs->ax;
badframe:
signal_fault(regs, frame, "sigreturn");
@@ -568,7 +570,6 @@ asmlinkage long sys_rt_sigreturn(void)
{
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe __user *frame;
- unsigned long ax;
sigset_t set;
frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
@@ -579,13 +580,13 @@ asmlinkage long sys_rt_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (restore_altstack(&frame->uc.uc_stack))
goto badframe;
- return ax;
+ return regs->ax;
badframe:
signal_fault(regs, frame, "rt_sigreturn");
@@ -780,7 +781,6 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
struct pt_regs *regs = current_pt_regs();
struct rt_sigframe_x32 __user *frame;
sigset_t set;
- unsigned long ax;
frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
@@ -791,13 +791,13 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
set_current_blocked(&set);
- if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
+ if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
goto badframe;
if (compat_restore_altstack(&frame->uc.uc_stack))
goto badframe;
- return ax;
+ return regs->ax;
badframe:
signal_fault(regs, frame, "x32 rt_sigreturn");
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index febc6aabc72e..7035f6b21c3f 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -779,6 +779,26 @@ out:
return boot_error;
}
+void common_cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+ /* Just in case we booted with a single CPU. */
+ alternatives_enable_smp();
+
+ per_cpu(current_task, cpu) = idle;
+
+#ifdef CONFIG_X86_32
+ /* Stack for startup_32 can be just as for start_secondary onwards */
+ irq_ctx_init(cpu);
+ per_cpu(cpu_current_top_of_stack, cpu) =
+ (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+#else
+ clear_tsk_thread_flag(idle, TIF_FORK);
+ initial_gs = per_cpu_offset(cpu);
+#endif
+ per_cpu(kernel_stack, cpu) =
+ (unsigned long)task_stack_page(idle) + THREAD_SIZE;
+}
+
/*
* NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
* (ie clustered apic addressing mode), this is a LOGICAL apic ID.
@@ -796,23 +816,9 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
int cpu0_nmi_registered = 0;
unsigned long timeout;
- /* Just in case we booted with a single CPU. */
- alternatives_enable_smp();
-
idle->thread.sp = (unsigned long) (((struct pt_regs *)
(THREAD_SIZE + task_stack_page(idle))) - 1);
- per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
- /* Stack for startup_32 can be just as for start_secondary onwards */
- irq_ctx_init(cpu);
-#else
- clear_tsk_thread_flag(idle, TIF_FORK);
- initial_gs = per_cpu_offset(cpu);
-#endif
- per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(idle) -
- KERNEL_STACK_OFFSET + THREAD_SIZE;
early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
initial_code = (unsigned long)start_secondary;
stack_start = idle->thread.sp;
@@ -953,6 +959,8 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
/* the FPU context is blank, nobody can own it */
__cpu_disable_lazy_restore(cpu);
+ common_cpu_up(cpu, tidle);
+
err = do_boot_cpu(apicid, cpu, tidle);
if (err) {
pr_err("do_boot_cpu failed(%d) to wakeup CPU#%u\n", err, cpu);
@@ -1086,8 +1094,6 @@ static int __init smp_sanity_check(unsigned max_cpus)
return SMP_NO_APIC;
}
- verify_local_APIC();
-
/*
* If SMP should be disabled, then really disable it!
*/
diff --git a/arch/x86/kernel/syscall_32.c b/arch/x86/kernel/syscall_32.c
index e9bcd57d8a9e..3777189c4a19 100644
--- a/arch/x86/kernel/syscall_32.c
+++ b/arch/x86/kernel/syscall_32.c
@@ -5,21 +5,29 @@
#include <linux/cache.h>
#include <asm/asm-offsets.h>
-#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void sym(void) ;
+#ifdef CONFIG_IA32_EMULATION
+#define SYM(sym, compat) compat
+#else
+#define SYM(sym, compat) sym
+#define ia32_sys_call_table sys_call_table
+#define __NR_ia32_syscall_max __NR_syscall_max
+#endif
+
+#define __SYSCALL_I386(nr, sym, compat) extern asmlinkage void SYM(sym, compat)(void) ;
#include <asm/syscalls_32.h>
#undef __SYSCALL_I386
-#define __SYSCALL_I386(nr, sym, compat) [nr] = sym,
+#define __SYSCALL_I386(nr, sym, compat) [nr] = SYM(sym, compat),
typedef asmlinkage void (*sys_call_ptr_t)(void);
extern asmlinkage void sys_ni_syscall(void);
-__visible const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = {
+__visible const sys_call_ptr_t ia32_sys_call_table[__NR_ia32_syscall_max+1] = {
/*
* Smells like a compiler bug -- it doesn't work
* when the & below is removed.
*/
- [0 ... __NR_syscall_max] = &sys_ni_syscall,
+ [0 ... __NR_ia32_syscall_max] = &sys_ni_syscall,
#include <asm/syscalls_32.h>
};
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c
index 25adc0e16eaa..d39c09119db6 100644
--- a/arch/x86/kernel/time.c
+++ b/arch/x86/kernel/time.c
@@ -30,7 +30,7 @@ unsigned long profile_pc(struct pt_regs *regs)
{
unsigned long pc = instruction_pointer(regs);
- if (!user_mode_vm(regs) && in_lock_functions(pc)) {
+ if (!user_mode(regs) && in_lock_functions(pc)) {
#ifdef CONFIG_FRAME_POINTER
return *(unsigned long *)(regs->bp + sizeof(long));
#else
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 9d2073e2ecc9..6751c5c58eec 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -112,7 +112,7 @@ enum ctx_state ist_enter(struct pt_regs *regs)
{
enum ctx_state prev_state;
- if (user_mode_vm(regs)) {
+ if (user_mode(regs)) {
/* Other than that, we're just an exception. */
prev_state = exception_enter();
} else {
@@ -146,7 +146,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
/* Must be before exception_exit. */
preempt_count_sub(HARDIRQ_OFFSET);
- if (user_mode_vm(regs))
+ if (user_mode(regs))
return exception_exit(prev_state);
else
rcu_nmi_exit();
@@ -158,7 +158,7 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
*
* IST exception handlers normally cannot schedule. As a special
* exception, if the exception interrupted userspace code (i.e.
- * user_mode_vm(regs) would return true) and the exception was not
+ * user_mode(regs) would return true) and the exception was not
* a double fault, it can be safe to schedule. ist_begin_non_atomic()
* begins a non-atomic section within an ist_enter()/ist_exit() region.
* Callers are responsible for enabling interrupts themselves inside
@@ -167,15 +167,15 @@ void ist_exit(struct pt_regs *regs, enum ctx_state prev_state)
*/
void ist_begin_non_atomic(struct pt_regs *regs)
{
- BUG_ON(!user_mode_vm(regs));
+ BUG_ON(!user_mode(regs));
/*
* Sanity check: we need to be on the normal thread stack. This
* will catch asm bugs and any attempt to use ist_preempt_enable
* from double_fault.
*/
- BUG_ON(((current_stack_pointer() ^ this_cpu_read_stable(kernel_stack))
- & ~(THREAD_SIZE - 1)) != 0);
+ BUG_ON((unsigned long)(current_top_of_stack() -
+ current_stack_pointer()) >= THREAD_SIZE);
preempt_count_sub(HARDIRQ_OFFSET);
}
@@ -194,8 +194,7 @@ static nokprobe_inline int
do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
struct pt_regs *regs, long error_code)
{
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK) {
+ if (v8086_mode(regs)) {
/*
* Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
* On nmi (interrupt 2), do_trap should not be called.
@@ -207,7 +206,7 @@ do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
}
return -1;
}
-#endif
+
if (!user_mode(regs)) {
if (!fixup_exception(regs)) {
tsk->thread.error_code = error_code;
@@ -462,13 +461,11 @@ do_general_protection(struct pt_regs *regs, long error_code)
prev_state = exception_enter();
conditional_sti(regs);
-#ifdef CONFIG_X86_32
- if (regs->flags & X86_VM_MASK) {
+ if (v8086_mode(regs)) {
local_irq_enable();
handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
goto exit;
}
-#endif
tsk = current;
if (!user_mode(regs)) {
@@ -587,7 +584,7 @@ struct bad_iret_stack *fixup_bad_iret(struct bad_iret_stack *s)
/* Copy the remainder of the stack from the current stack. */
memmove(new_stack, s, offsetof(struct bad_iret_stack, regs.ip));
- BUG_ON(!user_mode_vm(&new_stack->regs));
+ BUG_ON(!user_mode(&new_stack->regs));
return new_stack;
}
NOKPROBE_SYMBOL(fixup_bad_iret);
@@ -673,7 +670,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code)
/* It's safe to allow irq's after DR6 has been saved */
preempt_conditional_sti(regs);
- if (regs->flags & X86_VM_MASK) {
+ if (v8086_mode(regs)) {
handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code,
X86_TRAP_DB);
preempt_conditional_cli(regs);
@@ -721,7 +718,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr)
return;
conditional_sti(regs);
- if (!user_mode_vm(regs))
+ if (!user_mode(regs))
{
if (!fixup_exception(regs)) {
task->thread.error_code = error_code;
@@ -925,9 +922,21 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
/* Set of traps needed for early debugging. */
void __init early_trap_init(void)
{
- set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+ /*
+ * Don't use IST to set DEBUG_STACK as it doesn't work until TSS
+ * is ready in cpu_init() <-- trap_init(). Before trap_init(),
+ * CPU runs at ring 0 so it is impossible to hit an invalid
+ * stack. Using the original stack works well enough at this
+ * early stage. DEBUG_STACK will be equipped after cpu_init() in
+ * trap_init().
+ *
+ * We don't need to set trace_idt_table like set_intr_gate(),
+ * since we don't have trace_debug and it will be reset to
+ * 'debug' in trap_init() by set_intr_gate_ist().
+ */
+ set_intr_gate_notrace(X86_TRAP_DB, debug);
/* int3 can be called from all */
- set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+ set_system_intr_gate(X86_TRAP_BP, &int3);
#ifdef CONFIG_X86_32
set_intr_gate(X86_TRAP_PF, page_fault);
#endif
@@ -1005,6 +1014,15 @@ void __init trap_init(void)
*/
cpu_init();
+ /*
+ * X86_TRAP_DB and X86_TRAP_BP have been set
+ * in early_trap_init(). However, ITS works only after
+ * cpu_init() loads TSS. See comments in early_trap_init().
+ */
+ set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+ /* int3 can be called from all */
+ set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+
x86_init.irqs.trap_init();
#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 81f8adb0679e..0b81ad67da07 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -912,7 +912,7 @@ int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val,
int ret = NOTIFY_DONE;
/* We are only interested in userspace traps */
- if (regs && !user_mode_vm(regs))
+ if (regs && !user_mode(regs))
return NOTIFY_DONE;
switch (val) {
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index e8edcf52e069..fc9db6ef2a95 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -150,7 +150,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
do_exit(SIGSEGV);
}
- tss = &per_cpu(init_tss, get_cpu());
+ tss = &per_cpu(cpu_tss, get_cpu());
current->thread.sp0 = current->thread.saved_sp0;
current->thread.sysenter_cs = __KERNEL_CS;
load_sp0(tss, &current->thread);
@@ -318,7 +318,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
tsk->thread.saved_fs = info->regs32->fs;
tsk->thread.saved_gs = get_user_gs(info->regs32);
- tss = &per_cpu(init_tss, get_cpu());
+ tss = &per_cpu(cpu_tss, get_cpu());
tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
if (cpu_has_sep)
tsk->thread.sysenter_cs = 0;
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c
index c7d791f32b98..51e330416995 100644
--- a/arch/x86/kernel/vsyscall_gtod.c
+++ b/arch/x86/kernel/vsyscall_gtod.c
@@ -31,30 +31,30 @@ void update_vsyscall(struct timekeeper *tk)
gtod_write_begin(vdata);
/* copy vsyscall data */
- vdata->vclock_mode = tk->tkr.clock->archdata.vclock_mode;
- vdata->cycle_last = tk->tkr.cycle_last;
- vdata->mask = tk->tkr.mask;
- vdata->mult = tk->tkr.mult;
- vdata->shift = tk->tkr.shift;
+ vdata->vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->cycle_last = tk->tkr_mono.cycle_last;
+ vdata->mask = tk->tkr_mono.mask;
+ vdata->mult = tk->tkr_mono.mult;
+ vdata->shift = tk->tkr_mono.shift;
vdata->wall_time_sec = tk->xtime_sec;
- vdata->wall_time_snsec = tk->tkr.xtime_nsec;
+ vdata->wall_time_snsec = tk->tkr_mono.xtime_nsec;
vdata->monotonic_time_sec = tk->xtime_sec
+ tk->wall_to_monotonic.tv_sec;
- vdata->monotonic_time_snsec = tk->tkr.xtime_nsec
+ vdata->monotonic_time_snsec = tk->tkr_mono.xtime_nsec
+ ((u64)tk->wall_to_monotonic.tv_nsec
- << tk->tkr.shift);
+ << tk->tkr_mono.shift);
while (vdata->monotonic_time_snsec >=
- (((u64)NSEC_PER_SEC) << tk->tkr.shift)) {
+ (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
vdata->monotonic_time_snsec -=
- ((u64)NSEC_PER_SEC) << tk->tkr.shift;
+ ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift;
vdata->monotonic_time_sec++;
}
vdata->wall_time_coarse_sec = tk->xtime_sec;
- vdata->wall_time_coarse_nsec = (long)(tk->tkr.xtime_nsec >>
- tk->tkr.shift);
+ vdata->wall_time_coarse_nsec = (long)(tk->tkr_mono.xtime_nsec >>
+ tk->tkr_mono.shift);
vdata->monotonic_time_coarse_sec =
vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec;
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 34f66e58a896..cdc6cf903078 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c
@@ -379,7 +379,7 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
* thread's fpu state, reconstruct fxstate from the fsave
* header. Sanitize the copied state etc.
*/
- struct xsave_struct *xsave = &tsk->thread.fpu.state->xsave;
+ struct fpu *fpu = &tsk->thread.fpu;
struct user_i387_ia32_struct env;
int err = 0;
@@ -393,14 +393,15 @@ int __restore_xstate_sig(void __user *buf, void __user *buf_fx, int size)
*/
drop_fpu(tsk);
- if (__copy_from_user(xsave, buf_fx, state_size) ||
+ if (__copy_from_user(&fpu->state->xsave, buf_fx, state_size) ||
__copy_from_user(&env, buf, sizeof(env))) {
+ fpu_finit(fpu);
err = -1;
} else {
sanitize_restored_xstate(tsk, &env, xstate_bv, fx_only);
- set_used_math();
}
+ set_used_math();
if (use_eager_fpu()) {
preempt_disable();
math_state_restore();
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 08f790dfadc9..16e8f962eaad 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -1,5 +1,5 @@
-ccflags-y += -Ivirt/kvm -Iarch/x86/kvm
+ccflags-y += -Iarch/x86/kvm
CFLAGS_x86.o := -I.
CFLAGS_svm.o := -I.
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8a80737ee6e6..59b69f6a2844 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -104,6 +104,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
((best->eax & 0xff00) >> 8) != 0)
return -EINVAL;
+ /* Update physical-address width */
+ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
kvm_pmu_cpuid_update(vcpu);
return 0;
}
@@ -135,6 +138,21 @@ static void cpuid_fix_nx_cap(struct kvm_vcpu *vcpu)
}
}
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
+ if (!best || best->eax < 0x80000008)
+ goto not_found;
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ if (best)
+ return best->eax & 0xff;
+not_found:
+ return 36;
+}
+EXPORT_SYMBOL_GPL(cpuid_query_maxphyaddr);
+
/* when an old userspace process fills a new kernel module */
int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu,
struct kvm_cpuid *cpuid,
@@ -757,21 +775,6 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
-int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
-{
- struct kvm_cpuid_entry2 *best;
-
- best = kvm_find_cpuid_entry(vcpu, 0x80000000, 0);
- if (!best || best->eax < 0x80000008)
- goto not_found;
- best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best)
- return best->eax & 0xff;
-not_found:
- return 36;
-}
-EXPORT_SYMBOL_GPL(cpuid_maxphyaddr);
-
/*
* If no match is found, check whether we exceed the vCPU's limit
* and return the content of the highest valid _standard_ leaf instead.
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 4452eedfaedd..c3b1ad9fca81 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -20,13 +20,19 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid_entry2 __user *entries);
void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
+int cpuid_query_maxphyaddr(struct kvm_vcpu *vcpu);
+
+static inline int cpuid_maxphyaddr(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.maxphyaddr;
+}
static inline bool guest_cpuid_has_xsave(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
if (!static_cpu_has(X86_FEATURE_XSAVE))
- return 0;
+ return false;
best = kvm_find_cpuid_entry(vcpu, 1, 0);
return best && (best->ecx & bit(X86_FEATURE_XSAVE));
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e0b794a84c35..630bcb0d7a04 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -248,27 +248,7 @@ struct mode_dual {
struct opcode mode64;
};
-/* EFLAGS bit definitions. */
-#define EFLG_ID (1<<21)
-#define EFLG_VIP (1<<20)
-#define EFLG_VIF (1<<19)
-#define EFLG_AC (1<<18)
-#define EFLG_VM (1<<17)
-#define EFLG_RF (1<<16)
-#define EFLG_IOPL (3<<12)
-#define EFLG_NT (1<<14)
-#define EFLG_OF (1<<11)
-#define EFLG_DF (1<<10)
-#define EFLG_IF (1<<9)
-#define EFLG_TF (1<<8)
-#define EFLG_SF (1<<7)
-#define EFLG_ZF (1<<6)
-#define EFLG_AF (1<<4)
-#define EFLG_PF (1<<2)
-#define EFLG_CF (1<<0)
-
#define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
-#define EFLG_RESERVED_ONE_MASK 2
enum x86_transfer_type {
X86_TRANSFER_NONE,
@@ -317,7 +297,8 @@ static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
* These EFLAGS bits are restored from saved value during emulation, and
* any changes are written back to the saved value after emulation.
*/
-#define EFLAGS_MASK (EFLG_OF|EFLG_SF|EFLG_ZF|EFLG_AF|EFLG_PF|EFLG_CF)
+#define EFLAGS_MASK (X86_EFLAGS_OF|X86_EFLAGS_SF|X86_EFLAGS_ZF|X86_EFLAGS_AF|\
+ X86_EFLAGS_PF|X86_EFLAGS_CF)
#ifdef CONFIG_X86_64
#define ON64(x) x
@@ -478,6 +459,25 @@ static void assign_masked(ulong *dest, ulong src, ulong mask)
*dest = (*dest & ~mask) | (src & mask);
}
+static void assign_register(unsigned long *reg, u64 val, int bytes)
+{
+ /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
+ switch (bytes) {
+ case 1:
+ *(u8 *)reg = (u8)val;
+ break;
+ case 2:
+ *(u16 *)reg = (u16)val;
+ break;
+ case 4:
+ *reg = (u32)val;
+ break; /* 64b: zero-extend */
+ case 8:
+ *reg = val;
+ break;
+ }
+}
+
static inline unsigned long ad_mask(struct x86_emulate_ctxt *ctxt)
{
return (1UL << (ctxt->ad_bytes << 3)) - 1;
@@ -943,6 +943,22 @@ FASTOP2(xadd);
FASTOP2R(cmp, cmp_r);
+static int em_bsf_c(struct x86_emulate_ctxt *ctxt)
+{
+ /* If src is zero, do not writeback, but update flags */
+ if (ctxt->src.val == 0)
+ ctxt->dst.type = OP_NONE;
+ return fastop(ctxt, em_bsf);
+}
+
+static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
+{
+ /* If src is zero, do not writeback, but update flags */
+ if (ctxt->src.val == 0)
+ ctxt->dst.type = OP_NONE;
+ return fastop(ctxt, em_bsr);
+}
+
static u8 test_cc(unsigned int condition, unsigned long flags)
{
u8 rc;
@@ -1399,7 +1415,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
unsigned int in_page, n;
unsigned int count = ctxt->rep_prefix ?
address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
- in_page = (ctxt->eflags & EFLG_DF) ?
+ in_page = (ctxt->eflags & X86_EFLAGS_DF) ?
offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
n = min3(in_page, (unsigned int)sizeof(rc->data) / size, count);
@@ -1412,7 +1428,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
}
if (ctxt->rep_prefix && (ctxt->d & String) &&
- !(ctxt->eflags & EFLG_DF)) {
+ !(ctxt->eflags & X86_EFLAGS_DF)) {
ctxt->dst.data = rc->data + rc->pos;
ctxt->dst.type = OP_MEM_STR;
ctxt->dst.count = (rc->end - rc->pos) / size;
@@ -1691,21 +1707,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
static void write_register_operand(struct operand *op)
{
- /* The 4-byte case *is* correct: in 64-bit mode we zero-extend. */
- switch (op->bytes) {
- case 1:
- *(u8 *)op->addr.reg = (u8)op->val;
- break;
- case 2:
- *(u16 *)op->addr.reg = (u16)op->val;
- break;
- case 4:
- *op->addr.reg = (u32)op->val;
- break; /* 64b: zero-extend */
- case 8:
- *op->addr.reg = op->val;
- break;
- }
+ return assign_register(op->addr.reg, op->val, op->bytes);
}
static int writeback(struct x86_emulate_ctxt *ctxt, struct operand *op)
@@ -1792,32 +1794,34 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt,
{
int rc;
unsigned long val, change_mask;
- int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
int cpl = ctxt->ops->cpl(ctxt);
rc = emulate_pop(ctxt, &val, len);
if (rc != X86EMUL_CONTINUE)
return rc;
- change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
- | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_AC | EFLG_ID;
+ change_mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF |
+ X86_EFLAGS_TF | X86_EFLAGS_DF | X86_EFLAGS_NT |
+ X86_EFLAGS_AC | X86_EFLAGS_ID;
switch(ctxt->mode) {
case X86EMUL_MODE_PROT64:
case X86EMUL_MODE_PROT32:
case X86EMUL_MODE_PROT16:
if (cpl == 0)
- change_mask |= EFLG_IOPL;
+ change_mask |= X86_EFLAGS_IOPL;
if (cpl <= iopl)
- change_mask |= EFLG_IF;
+ change_mask |= X86_EFLAGS_IF;
break;
case X86EMUL_MODE_VM86:
if (iopl < 3)
return emulate_gp(ctxt, 0);
- change_mask |= EFLG_IF;
+ change_mask |= X86_EFLAGS_IF;
break;
default: /* real mode */
- change_mask |= (EFLG_IOPL | EFLG_IF);
+ change_mask |= (X86_EFLAGS_IOPL | X86_EFLAGS_IF);
break;
}
@@ -1918,7 +1922,7 @@ static int em_pusha(struct x86_emulate_ctxt *ctxt)
static int em_pushf(struct x86_emulate_ctxt *ctxt)
{
- ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
+ ctxt->src.val = (unsigned long)ctxt->eflags & ~X86_EFLAGS_VM;
return em_push(ctxt);
}
@@ -1926,6 +1930,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
{
int rc = X86EMUL_CONTINUE;
int reg = VCPU_REGS_RDI;
+ u32 val;
while (reg >= VCPU_REGS_RAX) {
if (reg == VCPU_REGS_RSP) {
@@ -1933,9 +1938,10 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
--reg;
}
- rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
+ rc = emulate_pop(ctxt, &val, ctxt->op_bytes);
if (rc != X86EMUL_CONTINUE)
break;
+ assign_register(reg_rmw(ctxt, reg), val, ctxt->op_bytes);
--reg;
}
return rc;
@@ -1956,7 +1962,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
if (rc != X86EMUL_CONTINUE)
return rc;
- ctxt->eflags &= ~(EFLG_IF | EFLG_TF | EFLG_AC);
+ ctxt->eflags &= ~(X86_EFLAGS_IF | X86_EFLAGS_TF | X86_EFLAGS_AC);
ctxt->src.val = get_segment_selector(ctxt, VCPU_SREG_CS);
rc = em_push(ctxt);
@@ -2022,10 +2028,14 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
unsigned long temp_eip = 0;
unsigned long temp_eflags = 0;
unsigned long cs = 0;
- unsigned long mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_TF |
- EFLG_IF | EFLG_DF | EFLG_OF | EFLG_IOPL | EFLG_NT | EFLG_RF |
- EFLG_AC | EFLG_ID | (1 << 1); /* Last one is the reserved bit */
- unsigned long vm86_mask = EFLG_VM | EFLG_VIF | EFLG_VIP;
+ unsigned long mask = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
+ X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_TF |
+ X86_EFLAGS_IF | X86_EFLAGS_DF | X86_EFLAGS_OF |
+ X86_EFLAGS_IOPL | X86_EFLAGS_NT | X86_EFLAGS_RF |
+ X86_EFLAGS_AC | X86_EFLAGS_ID |
+ X86_EFLAGS_FIXED;
+ unsigned long vm86_mask = X86_EFLAGS_VM | X86_EFLAGS_VIF |
+ X86_EFLAGS_VIP;
/* TODO: Add stack limit check */
@@ -2054,7 +2064,6 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
ctxt->_eip = temp_eip;
-
if (ctxt->op_bytes == 4)
ctxt->eflags = ((temp_eflags & mask) | (ctxt->eflags & vm86_mask));
else if (ctxt->op_bytes == 2) {
@@ -2063,7 +2072,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
}
ctxt->eflags &= ~EFLG_RESERVED_ZEROS_MASK; /* Clear reserved zeros */
- ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+ ctxt->eflags |= X86_EFLAGS_FIXED;
ctxt->ops->set_nmi_mask(ctxt, false);
return rc;
@@ -2145,12 +2154,12 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
- ctxt->eflags &= ~EFLG_ZF;
+ ctxt->eflags &= ~X86_EFLAGS_ZF;
} else {
ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
(u32) reg_read(ctxt, VCPU_REGS_RBX);
- ctxt->eflags |= EFLG_ZF;
+ ctxt->eflags |= X86_EFLAGS_ZF;
}
return X86EMUL_CONTINUE;
}
@@ -2222,7 +2231,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
ctxt->src.val = ctxt->dst.orig_val;
fastop(ctxt, em_cmp);
- if (ctxt->eflags & EFLG_ZF) {
+ if (ctxt->eflags & X86_EFLAGS_ZF) {
/* Success: write back to memory; no update of EAX */
ctxt->src.type = OP_NONE;
ctxt->dst.val = ctxt->src.orig_val;
@@ -2381,14 +2390,14 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
ctxt->eflags &= ~msr_data;
- ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
+ ctxt->eflags |= X86_EFLAGS_FIXED;
#endif
} else {
/* legacy mode */
ops->get_msr(ctxt, MSR_STAR, &msr_data);
ctxt->_eip = (u32)msr_data;
- ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
+ ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
return X86EMUL_CONTINUE;
@@ -2425,8 +2434,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
if ((msr_data & 0xfffc) == 0x0)
return emulate_gp(ctxt, 0);
- ctxt->eflags &= ~(EFLG_VM | EFLG_IF);
- cs_sel = (u16)msr_data & ~SELECTOR_RPL_MASK;
+ ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
+ cs_sel = (u16)msr_data & ~SEGMENT_RPL_MASK;
ss_sel = cs_sel + 8;
if (efer & EFER_LMA) {
cs.d = 0;
@@ -2493,8 +2502,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
return emulate_gp(ctxt, 0);
break;
}
- cs_sel |= SELECTOR_RPL_MASK;
- ss_sel |= SELECTOR_RPL_MASK;
+ cs_sel |= SEGMENT_RPL_MASK;
+ ss_sel |= SEGMENT_RPL_MASK;
ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
@@ -2512,7 +2521,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
return false;
if (ctxt->mode == X86EMUL_MODE_VM86)
return true;
- iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> X86_EFLAGS_IOPL_BIT;
return ctxt->ops->cpl(ctxt) > iopl;
}
@@ -2782,10 +2791,8 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
return ret;
ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl,
X86_TRANSFER_TASK_SWITCH, NULL);
- if (ret != X86EMUL_CONTINUE)
- return ret;
- return X86EMUL_CONTINUE;
+ return ret;
}
static int task_switch_32(struct x86_emulate_ctxt *ctxt,
@@ -2954,7 +2961,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
struct operand *op)
{
- int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
+ int df = (ctxt->eflags & X86_EFLAGS_DF) ? -op->count : op->count;
register_address_increment(ctxt, reg, df * op->bytes);
op->addr.mem.ea = register_address(ctxt, reg);
@@ -3323,7 +3330,7 @@ static int em_clts(struct x86_emulate_ctxt *ctxt)
return X86EMUL_CONTINUE;
}
-static int em_vmcall(struct x86_emulate_ctxt *ctxt)
+static int em_hypercall(struct x86_emulate_ctxt *ctxt)
{
int rc = ctxt->ops->fix_hypercall(ctxt);
@@ -3395,17 +3402,6 @@ static int em_lgdt(struct x86_emulate_ctxt *ctxt)
return em_lgdt_lidt(ctxt, true);
}
-static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
-{
- int rc;
-
- rc = ctxt->ops->fix_hypercall(ctxt);
-
- /* Disable writeback. */
- ctxt->dst.type = OP_NONE;
- return rc;
-}
-
static int em_lidt(struct x86_emulate_ctxt *ctxt)
{
return em_lgdt_lidt(ctxt, false);
@@ -3504,7 +3500,8 @@ static int em_sahf(struct x86_emulate_ctxt *ctxt)
{
u32 flags;
- flags = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF;
+ flags = X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
+ X86_EFLAGS_SF;
flags &= *reg_rmw(ctxt, VCPU_REGS_RAX) >> 8;
ctxt->eflags &= ~0xffUL;
@@ -3769,7 +3766,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
static const struct opcode group7_rm0[] = {
N,
- I(SrcNone | Priv | EmulateOnUD, em_vmcall),
+ I(SrcNone | Priv | EmulateOnUD, em_hypercall),
N, N, N, N, N, N,
};
@@ -3781,7 +3778,7 @@ static const struct opcode group7_rm1[] = {
static const struct opcode group7_rm3[] = {
DIP(SrcNone | Prot | Priv, vmrun, check_svme_pa),
- II(SrcNone | Prot | EmulateOnUD, em_vmmcall, vmmcall),
+ II(SrcNone | Prot | EmulateOnUD, em_hypercall, vmmcall),
DIP(SrcNone | Prot | Priv, vmload, check_svme_pa),
DIP(SrcNone | Prot | Priv, vmsave, check_svme_pa),
DIP(SrcNone | Prot | Priv, stgi, check_svme),
@@ -4192,7 +4189,8 @@ static const struct opcode twobyte_table[256] = {
N, N,
G(BitOp, group8),
F(DstMem | SrcReg | ModRM | BitOp | Lock | PageTable, em_btc),
- F(DstReg | SrcMem | ModRM, em_bsf), F(DstReg | SrcMem | ModRM, em_bsr),
+ I(DstReg | SrcMem | ModRM, em_bsf_c),
+ I(DstReg | SrcMem | ModRM, em_bsr_c),
D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
/* 0xC0 - 0xC7 */
F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
@@ -4759,9 +4757,9 @@ static bool string_insn_completed(struct x86_emulate_ctxt *ctxt)
if (((ctxt->b == 0xa6) || (ctxt->b == 0xa7) ||
(ctxt->b == 0xae) || (ctxt->b == 0xaf))
&& (((ctxt->rep_prefix == REPE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == 0))
+ ((ctxt->eflags & X86_EFLAGS_ZF) == 0))
|| ((ctxt->rep_prefix == REPNE_PREFIX) &&
- ((ctxt->eflags & EFLG_ZF) == EFLG_ZF))))
+ ((ctxt->eflags & X86_EFLAGS_ZF) == X86_EFLAGS_ZF))))
return true;
return false;
@@ -4913,7 +4911,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
/* All REP prefixes have the same first termination condition */
if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
ctxt->eip = ctxt->_eip;
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
goto done;
}
}
@@ -4950,7 +4948,8 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
}
- ctxt->dst.orig_val = ctxt->dst.val;
+ /* Copy full 64-bit value for CMPXCHG8B. */
+ ctxt->dst.orig_val64 = ctxt->dst.val64;
special_insn:
@@ -4962,9 +4961,9 @@ special_insn:
}
if (ctxt->rep_prefix && (ctxt->d & String))
- ctxt->eflags |= EFLG_RF;
+ ctxt->eflags |= X86_EFLAGS_RF;
else
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
if (ctxt->execute) {
if (ctxt->d & Fastop) {
@@ -5013,7 +5012,7 @@ special_insn:
rc = emulate_int(ctxt, ctxt->src.val);
break;
case 0xce: /* into */
- if (ctxt->eflags & EFLG_OF)
+ if (ctxt->eflags & X86_EFLAGS_OF)
rc = emulate_int(ctxt, 4);
break;
case 0xe9: /* jmp rel */
@@ -5026,19 +5025,19 @@ special_insn:
break;
case 0xf5: /* cmc */
/* complement carry flag from eflags reg */
- ctxt->eflags ^= EFLG_CF;
+ ctxt->eflags ^= X86_EFLAGS_CF;
break;
case 0xf8: /* clc */
- ctxt->eflags &= ~EFLG_CF;
+ ctxt->eflags &= ~X86_EFLAGS_CF;
break;
case 0xf9: /* stc */
- ctxt->eflags |= EFLG_CF;
+ ctxt->eflags |= X86_EFLAGS_CF;
break;
case 0xfc: /* cld */
- ctxt->eflags &= ~EFLG_DF;
+ ctxt->eflags &= ~X86_EFLAGS_DF;
break;
case 0xfd: /* std */
- ctxt->eflags |= EFLG_DF;
+ ctxt->eflags |= X86_EFLAGS_DF;
break;
default:
goto cannot_emulate;
@@ -5099,7 +5098,7 @@ writeback:
}
goto done; /* skip rip writeback */
}
- ctxt->eflags &= ~EFLG_RF;
+ ctxt->eflags &= ~X86_EFLAGS_RF;
}
ctxt->eip = ctxt->_eip;
@@ -5136,8 +5135,7 @@ twobyte_insn:
case 0x40 ... 0x4f: /* cmov */
if (test_cc(ctxt->b, ctxt->eflags))
ctxt->dst.val = ctxt->src.val;
- else if (ctxt->mode != X86EMUL_MODE_PROT64 ||
- ctxt->op_bytes != 4)
+ else if (ctxt->op_bytes != 4)
ctxt->dst.type = OP_NONE; /* no writeback */
break;
case 0x80 ... 0x8f: /* jnz rel, etc*/
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 298781d4cfb4..4dce6f8b6129 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -443,7 +443,8 @@ static inline int pit_in_range(gpa_t addr)
(addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH));
}
-static int pit_ioport_write(struct kvm_io_device *this,
+static int pit_ioport_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
@@ -519,7 +520,8 @@ static int pit_ioport_write(struct kvm_io_device *this,
return 0;
}
-static int pit_ioport_read(struct kvm_io_device *this,
+static int pit_ioport_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = dev_to_pit(this);
@@ -589,7 +591,8 @@ static int pit_ioport_read(struct kvm_io_device *this,
return 0;
}
-static int speaker_ioport_write(struct kvm_io_device *this,
+static int speaker_ioport_write(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
gpa_t addr, int len, const void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
@@ -606,8 +609,9 @@ static int speaker_ioport_write(struct kvm_io_device *this,
return 0;
}
-static int speaker_ioport_read(struct kvm_io_device *this,
- gpa_t addr, int len, void *data)
+static int speaker_ioport_read(struct kvm_vcpu *vcpu,
+ struct kvm_io_device *this,
+ gpa_t addr, int len, void *data)
{
struct kvm_pit *pit = speaker_to_pit(this);
struct kvm_kpit_state *pit_state = &pit->pit_state;
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index dd1b16b611b0..c84990b42b5b 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -3,7 +3,7 @@
#include <linux/kthread.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
struct kvm_kpit_channel_state {
u32 count; /* can be 65536 */
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index cc31f7c06d3d..fef922ff2635 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -507,6 +507,7 @@ static int picdev_read(struct kvm_pic *s,
return -EOPNOTSUPP;
if (len != 1) {
+ memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
@@ -528,42 +529,42 @@ static int picdev_read(struct kvm_pic *s,
return 0;
}
-static int picdev_master_write(struct kvm_io_device *dev,
+static int picdev_master_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
-static int picdev_master_read(struct kvm_io_device *dev,
+static int picdev_master_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_master),
addr, len, val);
}
-static int picdev_slave_write(struct kvm_io_device *dev,
+static int picdev_slave_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
-static int picdev_slave_read(struct kvm_io_device *dev,
+static int picdev_slave_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_slave),
addr, len, val);
}
-static int picdev_eclr_write(struct kvm_io_device *dev,
+static int picdev_eclr_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, const void *val)
{
return picdev_write(container_of(dev, struct kvm_pic, dev_eclr),
addr, len, val);
}
-static int picdev_eclr_read(struct kvm_io_device *dev,
+static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
gpa_t addr, int len, void *val)
{
return picdev_read(container_of(dev, struct kvm_pic, dev_eclr),
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index b1947e0f3e10..28146f03c514 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -206,6 +206,8 @@ static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
old_irr = ioapic->irr;
ioapic->irr |= mask;
+ if (edge)
+ ioapic->irr_delivered &= ~mask;
if ((edge && old_irr == ioapic->irr) ||
(!edge && entry.fields.remote_irr)) {
ret = 0;
@@ -349,7 +351,7 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.shorthand = 0;
if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
- ioapic->irr &= ~(1 << irq);
+ ioapic->irr_delivered |= 1 << irq;
if (irq == RTC_GSI && line_status) {
/*
@@ -422,6 +424,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
struct kvm_ioapic *ioapic, int vector, int trigger_mode)
{
int i;
+ struct kvm_lapic *apic = vcpu->arch.apic;
for (i = 0; i < IOAPIC_NUM_PINS; i++) {
union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
@@ -443,7 +446,8 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
spin_lock(&ioapic->lock);
- if (trigger_mode != IOAPIC_LEVEL_TRIG)
+ if (trigger_mode != IOAPIC_LEVEL_TRIG ||
+ kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)
continue;
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
@@ -471,13 +475,6 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
}
}
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
- smp_rmb();
- return test_bit(vector, ioapic->handled_vectors);
-}
-
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
{
struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
@@ -498,8 +495,8 @@ static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
(addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
}
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
- void *val)
+static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+ gpa_t addr, int len, void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 result;
@@ -541,8 +538,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
return 0;
}
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
- const void *val)
+static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
+ gpa_t addr, int len, const void *val)
{
struct kvm_ioapic *ioapic = to_ioapic(this);
u32 data;
@@ -597,6 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
ioapic->ioregsel = 0;
ioapic->irr = 0;
+ ioapic->irr_delivered = 0;
ioapic->id = 0;
memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
rtc_irq_eoi_tracking_reset(ioapic);
@@ -654,6 +652,7 @@ int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+ state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
return 0;
}
@@ -667,6 +666,7 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
+ ioapic->irr_delivered = 0;
update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index c2e36d934af4..ca0b0b4e6256 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -3,7 +3,7 @@
#include <linux/kvm_host.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
struct kvm;
struct kvm_vcpu;
@@ -77,6 +77,7 @@ struct kvm_ioapic {
struct rtc_status rtc_status;
struct delayed_work eoi_inject;
u32 irq_eoi[IOAPIC_NUM_PINS];
+ u32 irr_delivered;
};
#ifdef DEBUG
@@ -97,13 +98,19 @@ static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
}
+static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ smp_rmb();
+ return test_bit(vector, ioapic->handled_vectors);
+}
+
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode);
int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
int kvm_ioapic_init(struct kvm *kvm);
void kvm_ioapic_destroy(struct kvm *kvm);
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2d03568e9498..ad68c73008c5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -27,7 +27,7 @@
#include <linux/kvm_host.h>
#include <linux/spinlock.h>
-#include "iodev.h"
+#include <kvm/iodev.h>
#include "ioapic.h"
#include "lapic.h"
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index e55b5fc344eb..d67206a7b99a 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -133,6 +133,28 @@ static inline int kvm_apic_id(struct kvm_lapic *apic)
return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
}
+/* The logical map is definitely wrong if we have multiple
+ * modes at the same time. (Physical map is always right.)
+ */
+static inline bool kvm_apic_logical_map_valid(struct kvm_apic_map *map)
+{
+ return !(map->mode & (map->mode - 1));
+}
+
+static inline void
+apic_logical_id(struct kvm_apic_map *map, u32 dest_id, u16 *cid, u16 *lid)
+{
+ unsigned lid_bits;
+
+ BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_CLUSTER != 4);
+ BUILD_BUG_ON(KVM_APIC_MODE_XAPIC_FLAT != 8);
+ BUILD_BUG_ON(KVM_APIC_MODE_X2APIC != 16);
+ lid_bits = map->mode;
+
+ *cid = dest_id >> lid_bits;
+ *lid = dest_id & ((1 << lid_bits) - 1);
+}
+
static void recalculate_apic_map(struct kvm *kvm)
{
struct kvm_apic_map *new, *old = NULL;
@@ -146,48 +168,6 @@ static void recalculate_apic_map(struct kvm *kvm)
if (!new)
goto out;
- new->ldr_bits = 8;
- /* flat mode is default */
- new->cid_shift = 8;
- new->cid_mask = 0;
- new->lid_mask = 0xff;
- new->broadcast = APIC_BROADCAST;
-
- kvm_for_each_vcpu(i, vcpu, kvm) {
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- if (!kvm_apic_present(vcpu))
- continue;
-
- if (apic_x2apic_mode(apic)) {
- new->ldr_bits = 32;
- new->cid_shift = 16;
- new->cid_mask = new->lid_mask = 0xffff;
- new->broadcast = X2APIC_BROADCAST;
- } else if (kvm_apic_get_reg(apic, APIC_LDR)) {
- if (kvm_apic_get_reg(apic, APIC_DFR) ==
- APIC_DFR_CLUSTER) {
- new->cid_shift = 4;
- new->cid_mask = 0xf;
- new->lid_mask = 0xf;
- } else {
- new->cid_shift = 8;
- new->cid_mask = 0;
- new->lid_mask = 0xff;
- }
- }
-
- /*
- * All APICs have to be configured in the same mode by an OS.
- * We take advatage of this while building logical id loockup
- * table. After reset APICs are in software disabled mode, so if
- * we find apic with different setting we assume this is the mode
- * OS wants all apics to be in; build lookup table accordingly.
- */
- if (kvm_apic_sw_enabled(apic))
- break;
- }
-
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvm_lapic *apic = vcpu->arch.apic;
u16 cid, lid;
@@ -198,11 +178,25 @@ static void recalculate_apic_map(struct kvm *kvm)
aid = kvm_apic_id(apic);
ldr = kvm_apic_get_reg(apic, APIC_LDR);
- cid = apic_cluster_id(new, ldr);
- lid = apic_logical_id(new, ldr);
if (aid < ARRAY_SIZE(new->phys_map))
new->phys_map[aid] = apic;
+
+ if (apic_x2apic_mode(apic)) {
+ new->mode |= KVM_APIC_MODE_X2APIC;
+ } else if (ldr) {
+ ldr = GET_APIC_LOGICAL_ID(ldr);
+ if (kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
+ new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
+ else
+ new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
+ }
+
+ if (!kvm_apic_logical_map_valid(new))
+ continue;
+
+ apic_logical_id(new, ldr, &cid, &lid);
+
if (lid && cid < ARRAY_SIZE(new->logical_map))
new->logical_map[cid][ffs(lid) - 1] = apic;
}
@@ -588,15 +582,23 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
apic_update_ppr(apic);
}
-static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
{
- return dest == (apic_x2apic_mode(apic) ?
- X2APIC_BROADCAST : APIC_BROADCAST);
+ if (apic_x2apic_mode(apic))
+ return mda == X2APIC_BROADCAST;
+
+ return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST;
}
-static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
{
- return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+ if (kvm_apic_broadcast(apic, mda))
+ return true;
+
+ if (apic_x2apic_mode(apic))
+ return mda == kvm_apic_id(apic);
+
+ return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic));
}
static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
@@ -613,6 +615,7 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
&& (logical_id & mda & 0xffff) != 0;
logical_id = GET_APIC_LOGICAL_ID(logical_id);
+ mda = GET_APIC_DEST_FIELD(mda);
switch (kvm_apic_get_reg(apic, APIC_DFR)) {
case APIC_DFR_FLAT:
@@ -627,10 +630,27 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
}
}
+/* KVM APIC implementation has two quirks
+ * - dest always begins at 0 while xAPIC MDA has offset 24,
+ * - IOxAPIC messages have to be delivered (directly) to x2APIC.
+ */
+static u32 kvm_apic_mda(unsigned int dest_id, struct kvm_lapic *source,
+ struct kvm_lapic *target)
+{
+ bool ipi = source != NULL;
+ bool x2apic_mda = apic_x2apic_mode(ipi ? source : target);
+
+ if (!ipi && dest_id == APIC_BROADCAST && x2apic_mda)
+ return X2APIC_BROADCAST;
+
+ return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id);
+}
+
bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
int short_hand, unsigned int dest, int dest_mode)
{
struct kvm_lapic *target = vcpu->arch.apic;
+ u32 mda = kvm_apic_mda(dest, source, target);
apic_debug("target %p, source %p, dest 0x%x, "
"dest_mode 0x%x, short_hand 0x%x\n",
@@ -640,9 +660,9 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
switch (short_hand) {
case APIC_DEST_NOSHORT:
if (dest_mode == APIC_DEST_PHYSICAL)
- return kvm_apic_match_physical_addr(target, dest);
+ return kvm_apic_match_physical_addr(target, mda);
else
- return kvm_apic_match_logical_addr(target, dest);
+ return kvm_apic_match_logical_addr(target, mda);
case APIC_DEST_SELF:
return target == source;
case APIC_DEST_ALLINC:
@@ -664,6 +684,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic **dst;
int i;
bool ret = false;
+ bool x2apic_ipi = src && apic_x2apic_mode(src);
*r = -1;
@@ -675,15 +696,15 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
if (irq->shorthand)
return false;
+ if (irq->dest_id == (x2apic_ipi ? X2APIC_BROADCAST : APIC_BROADCAST))
+ return false;
+
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
if (!map)
goto out;
- if (irq->dest_id == map->broadcast)
- goto out;
-
ret = true;
if (irq->dest_mode == APIC_DEST_PHYSICAL) {
@@ -692,16 +713,20 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
dst = &map->phys_map[irq->dest_id];
} else {
- u32 mda = irq->dest_id << (32 - map->ldr_bits);
- u16 cid = apic_cluster_id(map, mda);
+ u16 cid;
+
+ if (!kvm_apic_logical_map_valid(map)) {
+ ret = false;
+ goto out;
+ }
+
+ apic_logical_id(map, irq->dest_id, &cid, (u16 *)&bitmap);
if (cid >= ARRAY_SIZE(map->logical_map))
goto out;
dst = map->logical_map[cid];
- bitmap = apic_logical_id(map, mda);
-
if (irq->delivery_mode == APIC_DM_LOWEST) {
int l = -1;
for_each_set_bit(i, &bitmap, 16) {
@@ -833,8 +858,7 @@ int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
{
- if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
- kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
+ if (kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
int trigger_mode;
if (apic_test_vector(vector, apic->regs + APIC_TMR))
trigger_mode = IOAPIC_LEVEL_TRIG;
@@ -1038,7 +1062,7 @@ static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
addr < apic->base_address + LAPIC_MMIO_LENGTH;
}
-static int apic_mmio_read(struct kvm_io_device *this,
+static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
gpa_t address, int len, void *data)
{
struct kvm_lapic *apic = to_lapic(this);
@@ -1358,7 +1382,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
return ret;
}
-static int apic_mmio_write(struct kvm_io_device *this,
+static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
gpa_t address, int len, const void *data)
{
struct kvm_lapic *apic = to_lapic(this);
@@ -1498,8 +1522,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
return;
}
- if (!kvm_vcpu_is_bsp(apic->vcpu))
- value &= ~MSR_IA32_APICBASE_BSP;
vcpu->arch.apic_base = value;
/* update jump label if enable bit changes */
@@ -1572,7 +1594,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
apic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
}
apic->irr_pending = kvm_apic_vid_enabled(vcpu->kvm);
- apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm);
+ apic->isr_count = kvm_x86_ops->hwapic_isr_update ? 1 : 0;
apic->highest_isr_cache = -1;
update_divide_count(apic);
atomic_set(&apic->lapic_timer.pending, 0);
@@ -1782,7 +1804,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
update_divide_count(apic);
start_apic_timer(apic);
apic->irr_pending = true;
- apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
+ apic->isr_count = kvm_x86_ops->hwapic_isr_update ?
1 : count_vectors(apic->regs + APIC_ISR);
apic->highest_isr_cache = -1;
if (kvm_x86_ops->hwapic_irr_update)
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 0bc6c656625b..9d28383fc1e7 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -1,7 +1,7 @@
#ifndef __KVM_X86_LAPIC_H
#define __KVM_X86_LAPIC_H
-#include "iodev.h"
+#include <kvm/iodev.h>
#include <linux/kvm_host.h>
@@ -148,21 +148,6 @@ static inline bool kvm_apic_vid_enabled(struct kvm *kvm)
return kvm_x86_ops->vm_has_apicv(kvm);
}
-static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
-{
- u16 cid;
- ldr >>= 32 - map->ldr_bits;
- cid = (ldr >> map->cid_shift) & map->cid_mask;
-
- return cid;
-}
-
-static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
-{
- ldr >>= (32 - map->ldr_bits);
- return ldr & map->lid_mask;
-}
-
static inline bool kvm_apic_has_events(struct kvm_vcpu *vcpu)
{
return vcpu->arch.apic->pending_events;
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index cee759299a35..146f295ee322 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4465,6 +4465,79 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
kvm_flush_remote_tlbs(kvm);
}
+static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
+ unsigned long *rmapp)
+{
+ u64 *sptep;
+ struct rmap_iterator iter;
+ int need_tlb_flush = 0;
+ pfn_t pfn;
+ struct kvm_mmu_page *sp;
+
+ for (sptep = rmap_get_first(*rmapp, &iter); sptep;) {
+ BUG_ON(!(*sptep & PT_PRESENT_MASK));
+
+ sp = page_header(__pa(sptep));
+ pfn = spte_to_pfn(*sptep);
+
+ /*
+ * Only EPT supported for now; otherwise, one would need to
+ * find out efficiently whether the guest page tables are
+ * also using huge pages.
+ */
+ if (sp->role.direct &&
+ !kvm_is_reserved_pfn(pfn) &&
+ PageTransCompound(pfn_to_page(pfn))) {
+ drop_spte(kvm, sptep);
+ sptep = rmap_get_first(*rmapp, &iter);
+ need_tlb_flush = 1;
+ } else
+ sptep = rmap_get_next(&iter);
+ }
+
+ return need_tlb_flush;
+}
+
+void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
+ struct kvm_memory_slot *memslot)
+{
+ bool flush = false;
+ unsigned long *rmapp;
+ unsigned long last_index, index;
+ gfn_t gfn_start, gfn_end;
+
+ spin_lock(&kvm->mmu_lock);
+
+ gfn_start = memslot->base_gfn;
+ gfn_end = memslot->base_gfn + memslot->npages - 1;
+
+ if (gfn_start >= gfn_end)
+ goto out;
+
+ rmapp = memslot->arch.rmap[0];
+ last_index = gfn_to_index(gfn_end, memslot->base_gfn,
+ PT_PAGE_TABLE_LEVEL);
+
+ for (index = 0; index <= last_index; ++index, ++rmapp) {
+ if (*rmapp)
+ flush |= kvm_mmu_zap_collapsible_spte(kvm, rmapp);
+
+ if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
+ if (flush) {
+ kvm_flush_remote_tlbs(kvm);
+ flush = false;
+ }
+ cond_resched_lock(&kvm->mmu_lock);
+ }
+ }
+
+ if (flush)
+ kvm_flush_remote_tlbs(kvm);
+
+out:
+ spin_unlock(&kvm->mmu_lock);
+}
+
void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
struct kvm_memory_slot *memslot)
{
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 8e6b7d869d2f..29fbf9dfdc54 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -38,7 +38,7 @@ static struct kvm_arch_event_perf_mapping {
};
/* mapping between fixed pmc index and arch_events array */
-int fixed_pmc_events[] = {1, 0, 7};
+static int fixed_pmc_events[] = {1, 0, 7};
static bool pmc_is_gp(struct kvm_pmc *pmc)
{
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d319e0c24758..ce741b8650f6 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1261,7 +1261,7 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_bsp(&svm->vcpu))
+ if (kvm_vcpu_is_reset_bsp(&svm->vcpu))
svm->vcpu.arch.apic_base |= MSR_IA32_APICBASE_BSP;
svm_init_osvw(&svm->vcpu);
@@ -1929,14 +1929,12 @@ static int nop_on_interception(struct vcpu_svm *svm)
static int halt_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
- skip_emulated_instruction(&svm->vcpu);
return kvm_emulate_halt(&svm->vcpu);
}
static int vmmcall_interception(struct vcpu_svm *svm)
{
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
- skip_emulated_instruction(&svm->vcpu);
kvm_emulate_hypercall(&svm->vcpu);
return 1;
}
@@ -2757,11 +2755,11 @@ static int invlpga_interception(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
- trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
- vcpu->arch.regs[VCPU_REGS_RAX]);
+ trace_kvm_invlpga(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RCX),
+ kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
- kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
+ kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu);
@@ -2770,12 +2768,18 @@ static int invlpga_interception(struct vcpu_svm *svm)
static int skinit_interception(struct vcpu_svm *svm)
{
- trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
+ trace_kvm_skinit(svm->vmcb->save.rip, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
kvm_queue_exception(&svm->vcpu, UD_VECTOR);
return 1;
}
+static int wbinvd_interception(struct vcpu_svm *svm)
+{
+ kvm_emulate_wbinvd(&svm->vcpu);
+ return 1;
+}
+
static int xsetbv_interception(struct vcpu_svm *svm)
{
u64 new_bv = kvm_read_edx_eax(&svm->vcpu);
@@ -2902,7 +2906,8 @@ static int rdpmc_interception(struct vcpu_svm *svm)
return 1;
}
-bool check_selective_cr0_intercepted(struct vcpu_svm *svm, unsigned long val)
+static bool check_selective_cr0_intercepted(struct vcpu_svm *svm,
+ unsigned long val)
{
unsigned long cr0 = svm->vcpu.arch.cr0;
bool ret = false;
@@ -2940,7 +2945,10 @@ static int cr_interception(struct vcpu_svm *svm)
return emulate_on_interception(svm);
reg = svm->vmcb->control.exit_info_1 & SVM_EXITINFO_REG_MASK;
- cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
+ if (svm->vmcb->control.exit_code == SVM_EXIT_CR0_SEL_WRITE)
+ cr = SVM_EXIT_WRITE_CR0 - SVM_EXIT_READ_CR0;
+ else
+ cr = svm->vmcb->control.exit_code - SVM_EXIT_READ_CR0;
err = 0;
if (cr >= 16) { /* mov to cr */
@@ -3133,7 +3141,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
static int rdmsr_interception(struct vcpu_svm *svm)
{
- u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
+ u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
u64 data;
if (svm_get_msr(&svm->vcpu, ecx, &data)) {
@@ -3142,8 +3150,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
} else {
trace_kvm_msr_read(ecx, data);
- svm->vcpu.arch.regs[VCPU_REGS_RAX] = data & 0xffffffff;
- svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32;
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RAX, data & 0xffffffff);
+ kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
skip_emulated_instruction(&svm->vcpu);
}
@@ -3246,9 +3254,8 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
static int wrmsr_interception(struct vcpu_svm *svm)
{
struct msr_data msr;
- u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
- u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
- | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32);
+ u32 ecx = kvm_register_read(&svm->vcpu, VCPU_REGS_RCX);
+ u64 data = kvm_read_edx_eax(&svm->vcpu);
msr.data = data;
msr.index = ecx;
@@ -3325,7 +3332,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_READ_CR3] = cr_interception,
[SVM_EXIT_READ_CR4] = cr_interception,
[SVM_EXIT_READ_CR8] = cr_interception,
- [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception,
+ [SVM_EXIT_CR0_SEL_WRITE] = cr_interception,
[SVM_EXIT_WRITE_CR0] = cr_interception,
[SVM_EXIT_WRITE_CR3] = cr_interception,
[SVM_EXIT_WRITE_CR4] = cr_interception,
@@ -3376,7 +3383,7 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
[SVM_EXIT_STGI] = stgi_interception,
[SVM_EXIT_CLGI] = clgi_interception,
[SVM_EXIT_SKINIT] = skinit_interception,
- [SVM_EXIT_WBINVD] = emulate_on_interception,
+ [SVM_EXIT_WBINVD] = wbinvd_interception,
[SVM_EXIT_MONITOR] = monitor_interception,
[SVM_EXIT_MWAIT] = mwait_interception,
[SVM_EXIT_XSETBV] = xsetbv_interception,
@@ -3555,7 +3562,7 @@ static int handle_exit(struct kvm_vcpu *vcpu)
if (exit_code >= ARRAY_SIZE(svm_exit_handlers)
|| !svm_exit_handlers[exit_code]) {
- WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_code);
+ WARN_ONCE(1, "svm: unexpected exit reason 0x%x\n", exit_code);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -3649,11 +3656,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap)
return;
}
-static void svm_hwapic_isr_update(struct kvm *kvm, int isr)
-{
- return;
-}
-
static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
return;
@@ -4403,7 +4405,6 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_virtual_x2apic_mode = svm_set_virtual_x2apic_mode,
.vm_has_apicv = svm_vm_has_apicv,
.load_eoi_exitmap = svm_load_eoi_exitmap,
- .hwapic_isr_update = svm_hwapic_isr_update,
.sync_pir_to_irr = svm_sync_pir_to_irr,
.set_tss_addr = svm_set_tss_addr,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 14c1a18d206a..f5e8dce8046c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2168,7 +2168,10 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
{
unsigned long *msr_bitmap;
- if (irqchip_in_kernel(vcpu->kvm) && apic_x2apic_mode(vcpu->arch.apic)) {
+ if (is_guest_mode(vcpu))
+ msr_bitmap = vmx_msr_bitmap_nested;
+ else if (irqchip_in_kernel(vcpu->kvm) &&
+ apic_x2apic_mode(vcpu->arch.apic)) {
if (is_long_mode(vcpu))
msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
@@ -2467,6 +2470,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_low = 0;
vmx->nested.nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
@@ -2476,8 +2480,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
if (enable_ept) {
/* nested EPT: emulate EPT also to L1 */
vmx->nested.nested_vmx_secondary_ctls_high |=
- SECONDARY_EXEC_ENABLE_EPT |
- SECONDARY_EXEC_UNRESTRICTED_GUEST;
+ SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
VMX_EPT_INVEPT_BIT;
@@ -2491,6 +2494,10 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
} else
vmx->nested.nested_vmx_ept_caps = 0;
+ if (enable_unrestricted_guest)
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_UNRESTRICTED_GUEST;
+
/* miscellaneous data */
rdmsr(MSR_IA32_VMX_MISC,
vmx->nested.nested_vmx_misc_low,
@@ -3262,8 +3269,8 @@ static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg,
* default value.
*/
if (seg == VCPU_SREG_CS || seg == VCPU_SREG_SS)
- save->selector &= ~SELECTOR_RPL_MASK;
- save->dpl = save->selector & SELECTOR_RPL_MASK;
+ save->selector &= ~SEGMENT_RPL_MASK;
+ save->dpl = save->selector & SEGMENT_RPL_MASK;
save->s = 1;
}
vmx_set_segment(vcpu, save, seg);
@@ -3836,7 +3843,7 @@ static bool code_segment_valid(struct kvm_vcpu *vcpu)
unsigned int cs_rpl;
vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
- cs_rpl = cs.selector & SELECTOR_RPL_MASK;
+ cs_rpl = cs.selector & SEGMENT_RPL_MASK;
if (cs.unusable)
return false;
@@ -3864,7 +3871,7 @@ static bool stack_segment_valid(struct kvm_vcpu *vcpu)
unsigned int ss_rpl;
vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
- ss_rpl = ss.selector & SELECTOR_RPL_MASK;
+ ss_rpl = ss.selector & SEGMENT_RPL_MASK;
if (ss.unusable)
return true;
@@ -3886,7 +3893,7 @@ static bool data_segment_valid(struct kvm_vcpu *vcpu, int seg)
unsigned int rpl;
vmx_get_segment(vcpu, &var, seg);
- rpl = var.selector & SELECTOR_RPL_MASK;
+ rpl = var.selector & SEGMENT_RPL_MASK;
if (var.unusable)
return true;
@@ -3913,7 +3920,7 @@ static bool tr_valid(struct kvm_vcpu *vcpu)
if (tr.unusable)
return false;
- if (tr.selector & SELECTOR_TI_MASK) /* TI = 1 */
+ if (tr.selector & SEGMENT_TI_MASK) /* TI = 1 */
return false;
if (tr.type != 3 && tr.type != 11) /* TODO: Check if guest is in IA32e mode */
return false;
@@ -3931,7 +3938,7 @@ static bool ldtr_valid(struct kvm_vcpu *vcpu)
if (ldtr.unusable)
return true;
- if (ldtr.selector & SELECTOR_TI_MASK) /* TI = 1 */
+ if (ldtr.selector & SEGMENT_TI_MASK) /* TI = 1 */
return false;
if (ldtr.type != 2)
return false;
@@ -3948,8 +3955,8 @@ static bool cs_ss_rpl_check(struct kvm_vcpu *vcpu)
vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
vmx_get_segment(vcpu, &ss, VCPU_SREG_SS);
- return ((cs.selector & SELECTOR_RPL_MASK) ==
- (ss.selector & SELECTOR_RPL_MASK));
+ return ((cs.selector & SEGMENT_RPL_MASK) ==
+ (ss.selector & SEGMENT_RPL_MASK));
}
/*
@@ -4367,6 +4374,18 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
return 0;
}
+static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_SMP
+ if (vcpu->mode == IN_GUEST_MODE) {
+ apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
+ POSTED_INTR_VECTOR);
+ return true;
+ }
+#endif
+ return false;
+}
+
static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
int vector)
{
@@ -4375,9 +4394,7 @@ static int vmx_deliver_nested_posted_interrupt(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu) &&
vector == vmx->nested.posted_intr_nv) {
/* the PIR and ON have been set by L1. */
- if (vcpu->mode == IN_GUEST_MODE)
- apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
- POSTED_INTR_VECTOR);
+ kvm_vcpu_trigger_posted_interrupt(vcpu);
/*
* If a posted intr is not recognized by hardware,
* we will accomplish it in the next vmentry.
@@ -4409,12 +4426,7 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
r = pi_test_and_set_on(&vmx->pi_desc);
kvm_make_request(KVM_REQ_EVENT, vcpu);
-#ifdef CONFIG_SMP
- if (!r && (vcpu->mode == IN_GUEST_MODE))
- apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
- POSTED_INTR_VECTOR);
- else
-#endif
+ if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu))
kvm_vcpu_kick(vcpu);
}
@@ -4700,7 +4712,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu)
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(&vmx->vcpu, 0);
apic_base_msr.data = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
- if (kvm_vcpu_is_bsp(&vmx->vcpu))
+ if (kvm_vcpu_is_reset_bsp(&vmx->vcpu))
apic_base_msr.data |= MSR_IA32_APICBASE_BSP;
apic_base_msr.host_initiated = true;
kvm_set_apic_base(&vmx->vcpu, &apic_base_msr);
@@ -4995,7 +5007,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
if (emulate_instruction(vcpu, 0) == EMULATE_DONE) {
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
- return kvm_emulate_halt(vcpu);
+ return kvm_vcpu_halt(vcpu);
}
return 1;
}
@@ -5060,6 +5072,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
}
if (is_invalid_opcode(intr_info)) {
+ if (is_guest_mode(vcpu)) {
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+ }
er = emulate_instruction(vcpu, EMULTYPE_TRAP_UD);
if (er != EMULATE_DONE)
kvm_queue_exception(vcpu, UD_VECTOR);
@@ -5079,9 +5095,10 @@ static int handle_exception(struct kvm_vcpu *vcpu)
!(is_page_fault(intr_info) && !(error_code & PFERR_RSVD_MASK))) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
- vcpu->run->internal.ndata = 2;
+ vcpu->run->internal.ndata = 3;
vcpu->run->internal.data[0] = vect_info;
vcpu->run->internal.data[1] = intr_info;
+ vcpu->run->internal.data[2] = error_code;
return 0;
}
@@ -5522,13 +5539,11 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu)
static int handle_halt(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
return kvm_emulate_halt(vcpu);
}
static int handle_vmcall(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
kvm_emulate_hypercall(vcpu);
return 1;
}
@@ -5559,7 +5574,6 @@ static int handle_rdpmc(struct kvm_vcpu *vcpu)
static int handle_wbinvd(struct kvm_vcpu *vcpu)
{
- skip_emulated_instruction(vcpu);
kvm_emulate_wbinvd(vcpu);
return 1;
}
@@ -5817,7 +5831,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
gpa_t gpa;
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
- if (!kvm_io_bus_write(vcpu->kvm, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+ if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
skip_emulated_instruction(vcpu);
return 1;
}
@@ -5898,7 +5912,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
- ret = kvm_emulate_halt(vcpu);
+ ret = kvm_vcpu_halt(vcpu);
goto out;
}
@@ -7307,21 +7321,21 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
else if (port < 0x10000)
bitmap = vmcs12->io_bitmap_b;
else
- return 1;
+ return true;
bitmap += (port & 0x7fff) / 8;
if (last_bitmap != bitmap)
if (kvm_read_guest(vcpu->kvm, bitmap, &b, 1))
- return 1;
+ return true;
if (b & (1 << (port & 7)))
- return 1;
+ return true;
port++;
size--;
last_bitmap = bitmap;
}
- return 0;
+ return false;
}
/*
@@ -7337,7 +7351,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
gpa_t bitmap;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
- return 1;
+ return true;
/*
* The MSR_BITMAP page is divided into four 1024-byte bitmaps,
@@ -7356,10 +7370,10 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
if (msr_index < 1024*8) {
unsigned char b;
if (kvm_read_guest(vcpu->kvm, bitmap + msr_index/8, &b, 1))
- return 1;
+ return true;
return 1 & (b >> (msr_index & 7));
} else
- return 1; /* let L1 handle the wrong parameter */
+ return true; /* let L1 handle the wrong parameter */
}
/*
@@ -7381,7 +7395,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
case 0:
if (vmcs12->cr0_guest_host_mask &
(val ^ vmcs12->cr0_read_shadow))
- return 1;
+ return true;
break;
case 3:
if ((vmcs12->cr3_target_count >= 1 &&
@@ -7392,37 +7406,37 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
vmcs12->cr3_target_value2 == val) ||
(vmcs12->cr3_target_count >= 4 &&
vmcs12->cr3_target_value3 == val))
- return 0;
+ return false;
if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
- return 1;
+ return true;
break;
case 4:
if (vmcs12->cr4_guest_host_mask &
(vmcs12->cr4_read_shadow ^ val))
- return 1;
+ return true;
break;
case 8:
if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
- return 1;
+ return true;
break;
}
break;
case 2: /* clts */
if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
(vmcs12->cr0_read_shadow & X86_CR0_TS))
- return 1;
+ return true;
break;
case 1: /* mov from cr */
switch (cr) {
case 3:
if (vmcs12->cpu_based_vm_exec_control &
CPU_BASED_CR3_STORE_EXITING)
- return 1;
+ return true;
break;
case 8:
if (vmcs12->cpu_based_vm_exec_control &
CPU_BASED_CR8_STORE_EXITING)
- return 1;
+ return true;
break;
}
break;
@@ -7433,14 +7447,14 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
*/
if (vmcs12->cr0_guest_host_mask & 0xe &
(val ^ vmcs12->cr0_read_shadow))
- return 1;
+ return true;
if ((vmcs12->cr0_guest_host_mask & 0x1) &&
!(vmcs12->cr0_read_shadow & 0x1) &&
(val & 0x1))
- return 1;
+ return true;
break;
}
- return 0;
+ return false;
}
/*
@@ -7463,48 +7477,48 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
KVM_ISA_VMX);
if (vmx->nested.nested_run_pending)
- return 0;
+ return false;
if (unlikely(vmx->fail)) {
pr_info_ratelimited("%s failed vm entry %x\n", __func__,
vmcs_read32(VM_INSTRUCTION_ERROR));
- return 1;
+ return true;
}
switch (exit_reason) {
case EXIT_REASON_EXCEPTION_NMI:
if (!is_exception(intr_info))
- return 0;
+ return false;
else if (is_page_fault(intr_info))
return enable_ept;
else if (is_no_device(intr_info) &&
!(vmcs12->guest_cr0 & X86_CR0_TS))
- return 0;
+ return false;
return vmcs12->exception_bitmap &
(1u << (intr_info & INTR_INFO_VECTOR_MASK));
case EXIT_REASON_EXTERNAL_INTERRUPT:
- return 0;
+ return false;
case EXIT_REASON_TRIPLE_FAULT:
- return 1;
+ return true;
case EXIT_REASON_PENDING_INTERRUPT:
return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_INTR_PENDING);
case EXIT_REASON_NMI_WINDOW:
return nested_cpu_has(vmcs12, CPU_BASED_VIRTUAL_NMI_PENDING);
case EXIT_REASON_TASK_SWITCH:
- return 1;
+ return true;
case EXIT_REASON_CPUID:
if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa)
- return 0;
- return 1;
+ return false;
+ return true;
case EXIT_REASON_HLT:
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
case EXIT_REASON_INVD:
- return 1;
+ return true;
case EXIT_REASON_INVLPG:
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
- case EXIT_REASON_RDTSC:
+ case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
@@ -7516,7 +7530,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* VMX instructions trap unconditionally. This allows L1 to
* emulate them for its L2 guest, i.e., allows 3-level nesting!
*/
- return 1;
+ return true;
case EXIT_REASON_CR_ACCESS:
return nested_vmx_exit_handled_cr(vcpu, vmcs12);
case EXIT_REASON_DR_ACCESS:
@@ -7527,7 +7541,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_MSR_WRITE:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
case EXIT_REASON_INVALID_STATE:
- return 1;
+ return true;
case EXIT_REASON_MWAIT_INSTRUCTION:
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
case EXIT_REASON_MONITOR_INSTRUCTION:
@@ -7537,7 +7551,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
nested_cpu_has2(vmcs12,
SECONDARY_EXEC_PAUSE_LOOP_EXITING);
case EXIT_REASON_MCE_DURING_VMENTRY:
- return 0;
+ return false;
case EXIT_REASON_TPR_BELOW_THRESHOLD:
return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
case EXIT_REASON_APIC_ACCESS:
@@ -7546,7 +7560,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_APIC_WRITE:
case EXIT_REASON_EOI_INDUCED:
/* apic_write and eoi_induced should exit unconditionally. */
- return 1;
+ return true;
case EXIT_REASON_EPT_VIOLATION:
/*
* L0 always deals with the EPT violation. If nested EPT is
@@ -7554,7 +7568,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* missing in the guest EPT table (EPT12), the EPT violation
* will be injected with nested_ept_inject_page_fault()
*/
- return 0;
+ return false;
case EXIT_REASON_EPT_MISCONFIG:
/*
* L2 never uses directly L1's EPT, but rather L0's own EPT
@@ -7562,11 +7576,11 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
* (EPT on EPT). So any problems with the structure of the
* table is L0's fault.
*/
- return 0;
+ return false;
case EXIT_REASON_WBINVD:
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
case EXIT_REASON_XSETBV:
- return 1;
+ return true;
case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
/*
* This should never happen, since it is not possible to
@@ -7576,7 +7590,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
*/
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
default:
- return 1;
+ return true;
}
}
@@ -8511,6 +8525,9 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
exec_control);
}
}
+ if (nested && !vmx->rdtscp_enabled)
+ vmx->nested.nested_vmx_secondary_ctls_high &=
+ ~SECONDARY_EXEC_RDTSCP;
}
/* Exposing INVPCID only when PCID is exposed */
@@ -8611,10 +8628,11 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
+ int maxphyaddr = cpuid_maxphyaddr(vcpu);
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
- /* TODO: Also verify bits beyond physical address width are 0 */
- if (!PAGE_ALIGNED(vmcs12->apic_access_addr))
+ if (!PAGE_ALIGNED(vmcs12->apic_access_addr) ||
+ vmcs12->apic_access_addr >> maxphyaddr)
return false;
/*
@@ -8630,8 +8648,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
}
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
- /* TODO: Also verify bits beyond physical address width are 0 */
- if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr))
+ if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) ||
+ vmcs12->virtual_apic_page_addr >> maxphyaddr)
return false;
if (vmx->nested.virtual_apic_page) /* shouldn't happen */
@@ -8654,7 +8672,8 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu,
}
if (nested_cpu_has_posted_intr(vmcs12)) {
- if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64))
+ if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) ||
+ vmcs12->posted_intr_desc_addr >> maxphyaddr)
return false;
if (vmx->nested.pi_desc_page) { /* shouldn't happen */
@@ -8853,9 +8872,9 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
unsigned long count_field,
- unsigned long addr_field,
- int maxphyaddr)
+ unsigned long addr_field)
{
+ int maxphyaddr;
u64 count, addr;
if (vmcs12_read_any(vcpu, count_field, &count) ||
@@ -8865,6 +8884,7 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
}
if (count == 0)
return 0;
+ maxphyaddr = cpuid_maxphyaddr(vcpu);
if (!IS_ALIGNED(addr, 16) || addr >> maxphyaddr ||
(addr + count * sizeof(struct vmx_msr_entry) - 1) >> maxphyaddr) {
pr_warn_ratelimited(
@@ -8878,19 +8898,16 @@ static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
static int nested_vmx_check_msr_switch_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
- int maxphyaddr;
-
if (vmcs12->vm_exit_msr_load_count == 0 &&
vmcs12->vm_exit_msr_store_count == 0 &&
vmcs12->vm_entry_msr_load_count == 0)
return 0; /* Fast path */
- maxphyaddr = cpuid_maxphyaddr(vcpu);
if (nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_LOAD_COUNT,
- VM_EXIT_MSR_LOAD_ADDR, maxphyaddr) ||
+ VM_EXIT_MSR_LOAD_ADDR) ||
nested_vmx_check_msr_switch(vcpu, VM_EXIT_MSR_STORE_COUNT,
- VM_EXIT_MSR_STORE_ADDR, maxphyaddr) ||
+ VM_EXIT_MSR_STORE_ADDR) ||
nested_vmx_check_msr_switch(vcpu, VM_ENTRY_MSR_LOAD_COUNT,
- VM_ENTRY_MSR_LOAD_ADDR, maxphyaddr))
+ VM_ENTRY_MSR_LOAD_ADDR))
return -EINVAL;
return 0;
}
@@ -9140,8 +9157,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~SECONDARY_EXEC_RDTSCP;
/* Take the following fields only from vmcs12 */
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
- SECONDARY_EXEC_APIC_REGISTER_VIRT);
+ SECONDARY_EXEC_APIC_REGISTER_VIRT);
if (nested_cpu_has(vmcs12,
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
exec_control |= vmcs12->secondary_vm_exec_control;
@@ -9213,9 +9231,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
}
if (cpu_has_vmx_msr_bitmap() &&
- exec_control & CPU_BASED_USE_MSR_BITMAPS &&
- nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) {
- vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_nested));
+ exec_control & CPU_BASED_USE_MSR_BITMAPS) {
+ nested_vmx_merge_msr_bitmap(vcpu, vmcs12);
+ /* MSR_BITMAP will be set by following vmx_set_efer. */
} else
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
@@ -9374,7 +9392,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
}
if (!nested_get_vmcs12_pages(vcpu, vmcs12)) {
- /*TODO: Also verify bits beyond physical address width are 0*/
nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
return 1;
}
@@ -9513,7 +9530,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
vmcs12->launch_state = 1;
if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
- return kvm_emulate_halt(vcpu);
+ return kvm_vcpu_halt(vcpu);
vmx->nested.nested_run_pending = 1;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bd7a70be41b3..e1a81267f3f6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -801,6 +801,17 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_get_cr8);
+static void kvm_update_dr0123(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
+ for (i = 0; i < KVM_NR_DB_REGS; i++)
+ vcpu->arch.eff_db[i] = vcpu->arch.db[i];
+ vcpu->arch.switch_db_regs |= KVM_DEBUGREG_RELOAD;
+ }
+}
+
static void kvm_update_dr6(struct kvm_vcpu *vcpu)
{
if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
@@ -1070,19 +1081,19 @@ static void update_pvclock_gtod(struct timekeeper *tk)
struct pvclock_gtod_data *vdata = &pvclock_gtod_data;
u64 boot_ns;
- boot_ns = ktime_to_ns(ktime_add(tk->tkr.base_mono, tk->offs_boot));
+ boot_ns = ktime_to_ns(ktime_add(tk->tkr_mono.base, tk->offs_boot));
write_seqcount_begin(&vdata->seq);
/* copy pvclock gtod data */
- vdata->clock.vclock_mode = tk->tkr.clock->archdata.vclock_mode;
- vdata->clock.cycle_last = tk->tkr.cycle_last;
- vdata->clock.mask = tk->tkr.mask;
- vdata->clock.mult = tk->tkr.mult;
- vdata->clock.shift = tk->tkr.shift;
+ vdata->clock.vclock_mode = tk->tkr_mono.clock->archdata.vclock_mode;
+ vdata->clock.cycle_last = tk->tkr_mono.cycle_last;
+ vdata->clock.mask = tk->tkr_mono.mask;
+ vdata->clock.mult = tk->tkr_mono.mult;
+ vdata->clock.shift = tk->tkr_mono.shift;
vdata->boot_ns = boot_ns;
- vdata->nsec_base = tk->tkr.xtime_nsec;
+ vdata->nsec_base = tk->tkr_mono.xtime_nsec;
write_seqcount_end(&vdata->seq);
}
@@ -2744,7 +2755,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_USER_NMI:
case KVM_CAP_REINJECT_CONTROL:
case KVM_CAP_IRQ_INJECT_STATUS:
- case KVM_CAP_IRQFD:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
case KVM_CAP_PIT2:
@@ -3150,6 +3160,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
return -EINVAL;
memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
+ kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = dbgregs->dr6;
kvm_update_dr6(vcpu);
vcpu->arch.dr7 = dbgregs->dr7;
@@ -4115,8 +4126,8 @@ static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len,
do {
n = min(len, 8);
if (!(vcpu->arch.apic &&
- !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, n, v))
- && kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ !kvm_iodevice_write(vcpu, &vcpu->arch.apic->dev, addr, n, v))
+ && kvm_io_bus_write(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
handled += n;
addr += n;
@@ -4135,8 +4146,9 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
do {
n = min(len, 8);
if (!(vcpu->arch.apic &&
- !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, n, v))
- && kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, addr, n, v))
+ !kvm_iodevice_read(vcpu, &vcpu->arch.apic->dev,
+ addr, n, v))
+ && kvm_io_bus_read(vcpu, KVM_MMIO_BUS, addr, n, v))
break;
trace_kvm_mmio(KVM_TRACE_MMIO_READ, n, addr, *(u64 *)v);
handled += n;
@@ -4476,7 +4488,8 @@ mmio:
return X86EMUL_CONTINUE;
}
-int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
+static int emulator_read_write(struct x86_emulate_ctxt *ctxt,
+ unsigned long addr,
void *val, unsigned int bytes,
struct x86_exception *exception,
const struct read_write_emulator_ops *ops)
@@ -4539,7 +4552,7 @@ static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt,
exception, &read_emultor);
}
-int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
+static int emulator_write_emulated(struct x86_emulate_ctxt *ctxt,
unsigned long addr,
const void *val,
unsigned int bytes,
@@ -4630,10 +4643,10 @@ static int kernel_pio(struct kvm_vcpu *vcpu, void *pd)
int r;
if (vcpu->arch.pio.in)
- r = kvm_io_bus_read(vcpu->kvm, KVM_PIO_BUS, vcpu->arch.pio.port,
+ r = kvm_io_bus_read(vcpu, KVM_PIO_BUS, vcpu->arch.pio.port,
vcpu->arch.pio.size, pd);
else
- r = kvm_io_bus_write(vcpu->kvm, KVM_PIO_BUS,
+ r = kvm_io_bus_write(vcpu, KVM_PIO_BUS,
vcpu->arch.pio.port, vcpu->arch.pio.size,
pd);
return r;
@@ -4706,7 +4719,7 @@ static void emulator_invlpg(struct x86_emulate_ctxt *ctxt, ulong address)
kvm_mmu_invlpg(emul_to_vcpu(ctxt), address);
}
-int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
{
if (!need_emulate_wbinvd(vcpu))
return X86EMUL_CONTINUE;
@@ -4723,19 +4736,29 @@ int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
wbinvd();
return X86EMUL_CONTINUE;
}
+
+int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_emulate_wbinvd_noskip(vcpu);
+}
EXPORT_SYMBOL_GPL(kvm_emulate_wbinvd);
+
+
static void emulator_wbinvd(struct x86_emulate_ctxt *ctxt)
{
- kvm_emulate_wbinvd(emul_to_vcpu(ctxt));
+ kvm_emulate_wbinvd_noskip(emul_to_vcpu(ctxt));
}
-int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
+static int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
+ unsigned long *dest)
{
return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
}
-int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
+static int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
+ unsigned long value)
{
return __kvm_set_dr(emul_to_vcpu(ctxt), dr, value);
@@ -5817,7 +5840,7 @@ void kvm_arch_exit(void)
free_percpu(shared_msrs);
}
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
{
++vcpu->stat.halt_exits;
if (irqchip_in_kernel(vcpu->kvm)) {
@@ -5828,6 +5851,13 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
return 0;
}
}
+EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
+
+int kvm_emulate_halt(struct kvm_vcpu *vcpu)
+{
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+ return kvm_vcpu_halt(vcpu);
+}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
@@ -5904,7 +5934,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
lapic_irq.dest_id = apicid;
lapic_irq.delivery_mode = APIC_DM_REMRD;
- kvm_irq_delivery_to_apic(kvm, 0, &lapic_irq, NULL);
+ kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@ -5912,6 +5942,8 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
unsigned long nr, a0, a1, a2, a3, ret;
int op_64_bit, r = 1;
+ kvm_x86_ops->skip_emulated_instruction(vcpu);
+
if (kvm_hv_hypercall_enabled(vcpu->kvm))
return kvm_hv_hypercall(vcpu);
@@ -6165,7 +6197,7 @@ void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
}
/*
- * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * Returns 1 to let vcpu_run() continue the guest execution loop without
* exiting to the userspace. Otherwise, the value will be returned to the
* userspace.
*/
@@ -6302,6 +6334,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
set_debugreg(vcpu->arch.eff_db[2], 2);
set_debugreg(vcpu->arch.eff_db[3], 3);
set_debugreg(vcpu->arch.dr6, 6);
+ vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD;
}
trace_kvm_entry(vcpu->vcpu_id);
@@ -6383,42 +6416,47 @@ out:
return r;
}
+static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
+{
+ if (!kvm_arch_vcpu_runnable(vcpu)) {
+ srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+ kvm_vcpu_block(vcpu);
+ vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
+ if (!kvm_check_request(KVM_REQ_UNHALT, vcpu))
+ return 1;
+ }
+
+ kvm_apic_accept_events(vcpu);
+ switch(vcpu->arch.mp_state) {
+ case KVM_MP_STATE_HALTED:
+ vcpu->arch.pv.pv_unhalted = false;
+ vcpu->arch.mp_state =
+ KVM_MP_STATE_RUNNABLE;
+ case KVM_MP_STATE_RUNNABLE:
+ vcpu->arch.apf.halted = false;
+ break;
+ case KVM_MP_STATE_INIT_RECEIVED:
+ break;
+ default:
+ return -EINTR;
+ break;
+ }
+ return 1;
+}
-static int __vcpu_run(struct kvm_vcpu *vcpu)
+static int vcpu_run(struct kvm_vcpu *vcpu)
{
int r;
struct kvm *kvm = vcpu->kvm;
vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- r = 1;
- while (r > 0) {
+ for (;;) {
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
!vcpu->arch.apf.halted)
r = vcpu_enter_guest(vcpu);
- else {
- srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
- kvm_vcpu_block(vcpu);
- vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
- if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
- kvm_apic_accept_events(vcpu);
- switch(vcpu->arch.mp_state) {
- case KVM_MP_STATE_HALTED:
- vcpu->arch.pv.pv_unhalted = false;
- vcpu->arch.mp_state =
- KVM_MP_STATE_RUNNABLE;
- case KVM_MP_STATE_RUNNABLE:
- vcpu->arch.apf.halted = false;
- break;
- case KVM_MP_STATE_INIT_RECEIVED:
- break;
- default:
- r = -EINTR;
- break;
- }
- }
- }
-
+ else
+ r = vcpu_block(kvm, vcpu);
if (r <= 0)
break;
@@ -6430,6 +6468,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.request_irq_exits;
+ break;
}
kvm_check_async_pf_completion(vcpu);
@@ -6438,6 +6477,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
r = -EINTR;
vcpu->run->exit_reason = KVM_EXIT_INTR;
++vcpu->stat.signal_exits;
+ break;
}
if (need_resched()) {
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -6569,7 +6609,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
} else
WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
- r = __vcpu_run(vcpu);
+ r = vcpu_run(vcpu);
out:
post_kvm_run_save(vcpu);
@@ -7076,11 +7116,14 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
kvm_clear_exception_queue(vcpu);
memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
+ kvm_update_dr0123(vcpu);
vcpu->arch.dr6 = DR6_INIT;
kvm_update_dr6(vcpu);
vcpu->arch.dr7 = DR7_FIXED_1;
kvm_update_dr7(vcpu);
+ vcpu->arch.cr2 = 0;
+
kvm_make_request(KVM_REQ_EVENT, vcpu);
vcpu->arch.apf.msr_val = 0;
vcpu->arch.st.msr_val = 0;
@@ -7241,7 +7284,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.emulate_ctxt.ops = &emulate_ops;
- if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu))
+ if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_reset_bsp(vcpu))
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
else
vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
@@ -7289,6 +7332,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
vcpu->arch.guest_supported_xcr0 = 0;
vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
+ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+
kvm_async_pf_hash_reset(vcpu);
kvm_pmu_init(vcpu);
@@ -7429,7 +7474,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
- kvm_kvfree(free->arch.rmap[i]);
+ kvfree(free->arch.rmap[i]);
free->arch.rmap[i] = NULL;
}
if (i == 0)
@@ -7437,7 +7482,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
if (!dont || free->arch.lpage_info[i - 1] !=
dont->arch.lpage_info[i - 1]) {
- kvm_kvfree(free->arch.lpage_info[i - 1]);
+ kvfree(free->arch.lpage_info[i - 1]);
free->arch.lpage_info[i - 1] = NULL;
}
}
@@ -7491,12 +7536,12 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
out_free:
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
- kvm_kvfree(slot->arch.rmap[i]);
+ kvfree(slot->arch.rmap[i]);
slot->arch.rmap[i] = NULL;
if (i == 0)
continue;
- kvm_kvfree(slot->arch.lpage_info[i - 1]);
+ kvfree(slot->arch.lpage_info[i - 1]);
slot->arch.lpage_info[i - 1] = NULL;
}
return -ENOMEM;
@@ -7619,6 +7664,23 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
new = id_to_memslot(kvm->memslots, mem->slot);
/*
+ * Dirty logging tracks sptes in 4k granularity, meaning that large
+ * sptes have to be split. If live migration is successful, the guest
+ * in the source machine will be destroyed and large sptes will be
+ * created in the destination. However, if the guest continues to run
+ * in the source machine (for example if live migration fails), small
+ * sptes will remain around and cause bad performance.
+ *
+ * Scan sptes if dirty logging has been stopped, dropping those
+ * which can be collapsed into a single large-page spte. Later
+ * page faults will create the large-page sptes.
+ */
+ if ((change != KVM_MR_DELETE) &&
+ (old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
+ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ kvm_mmu_zap_collapsible_sptes(kvm, new);
+
+ /*
* Set up write protection and/or dirty logging for the new slot.
*
* For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of old slot have
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index ac4453d8520e..717908b16037 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -868,7 +868,8 @@ static void __init lguest_init_IRQ(void)
/* Some systems map "vectors" to interrupts weirdly. Not us! */
__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
if (i != SYSCALL_VECTOR)
- set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
+ set_intr_gate(i, irq_entries_start +
+ 8 * (i - FIRST_EXTERNAL_VECTOR));
}
/*
@@ -1076,6 +1077,7 @@ static void lguest_load_sp0(struct tss_struct *tss,
{
lazy_hcall3(LHCALL_SET_STACK, __KERNEL_DS | 0x1, thread->sp0,
THREAD_SIZE / PAGE_SIZE);
+ tss->x86_tss.sp0 = thread->sp0;
}
/* Let's just say, I wouldn't do debugging under a Guest. */
diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S
index f5cc9eb1d51b..082a85167a5b 100644
--- a/arch/x86/lib/atomic64_cx8_32.S
+++ b/arch/x86/lib/atomic64_cx8_32.S
@@ -13,16 +13,6 @@
#include <asm/alternative-asm.h>
#include <asm/dwarf2.h>
-.macro SAVE reg
- pushl_cfi %\reg
- CFI_REL_OFFSET \reg, 0
-.endm
-
-.macro RESTORE reg
- popl_cfi %\reg
- CFI_RESTORE \reg
-.endm
-
.macro read64 reg
movl %ebx, %eax
movl %ecx, %edx
@@ -67,10 +57,10 @@ ENDPROC(atomic64_xchg_cx8)
.macro addsub_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
- SAVE ebp
- SAVE ebx
- SAVE esi
- SAVE edi
+ pushl_cfi_reg ebp
+ pushl_cfi_reg ebx
+ pushl_cfi_reg esi
+ pushl_cfi_reg edi
movl %eax, %esi
movl %edx, %edi
@@ -89,10 +79,10 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
- RESTORE edi
- RESTORE esi
- RESTORE ebx
- RESTORE ebp
+ popl_cfi_reg edi
+ popl_cfi_reg esi
+ popl_cfi_reg ebx
+ popl_cfi_reg ebp
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@@ -104,7 +94,7 @@ addsub_return sub sub sbb
.macro incdec_return func ins insc
ENTRY(atomic64_\func\()_return_cx8)
CFI_STARTPROC
- SAVE ebx
+ pushl_cfi_reg ebx
read64 %esi
1:
@@ -119,7 +109,7 @@ ENTRY(atomic64_\func\()_return_cx8)
10:
movl %ebx, %eax
movl %ecx, %edx
- RESTORE ebx
+ popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_\func\()_return_cx8)
@@ -130,7 +120,7 @@ incdec_return dec sub sbb
ENTRY(atomic64_dec_if_positive_cx8)
CFI_STARTPROC
- SAVE ebx
+ pushl_cfi_reg ebx
read64 %esi
1:
@@ -146,18 +136,18 @@ ENTRY(atomic64_dec_if_positive_cx8)
2:
movl %ebx, %eax
movl %ecx, %edx
- RESTORE ebx
+ popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_dec_if_positive_cx8)
ENTRY(atomic64_add_unless_cx8)
CFI_STARTPROC
- SAVE ebp
- SAVE ebx
+ pushl_cfi_reg ebp
+ pushl_cfi_reg ebx
/* these just push these two parameters on the stack */
- SAVE edi
- SAVE ecx
+ pushl_cfi_reg edi
+ pushl_cfi_reg ecx
movl %eax, %ebp
movl %edx, %edi
@@ -179,8 +169,8 @@ ENTRY(atomic64_add_unless_cx8)
3:
addl $8, %esp
CFI_ADJUST_CFA_OFFSET -8
- RESTORE ebx
- RESTORE ebp
+ popl_cfi_reg ebx
+ popl_cfi_reg ebp
ret
4:
cmpl %edx, 4(%esp)
@@ -192,7 +182,7 @@ ENDPROC(atomic64_add_unless_cx8)
ENTRY(atomic64_inc_not_zero_cx8)
CFI_STARTPROC
- SAVE ebx
+ pushl_cfi_reg ebx
read64 %esi
1:
@@ -209,7 +199,7 @@ ENTRY(atomic64_inc_not_zero_cx8)
movl $1, %eax
3:
- RESTORE ebx
+ popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(atomic64_inc_not_zero_cx8)
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S
index e78b8eee6615..9bc944a91274 100644
--- a/arch/x86/lib/checksum_32.S
+++ b/arch/x86/lib/checksum_32.S
@@ -51,10 +51,8 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum)
*/
ENTRY(csum_partial)
CFI_STARTPROC
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg esi
+ pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: unsigned char *buff
@@ -127,14 +125,12 @@ ENTRY(csum_partial)
6: addl %ecx,%eax
adcl $0, %eax
7:
- testl $1, 12(%esp)
+ testb $1, 12(%esp)
jz 8f
roll $8, %eax
8:
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %esi
- CFI_RESTORE esi
+ popl_cfi_reg ebx
+ popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@@ -145,10 +141,8 @@ ENDPROC(csum_partial)
ENTRY(csum_partial)
CFI_STARTPROC
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg esi
+ pushl_cfi_reg ebx
movl 20(%esp),%eax # Function arg: unsigned int sum
movl 16(%esp),%ecx # Function arg: int len
movl 12(%esp),%esi # Function arg: const unsigned char *buf
@@ -251,14 +245,12 @@ ENTRY(csum_partial)
addl %ebx,%eax
adcl $0,%eax
80:
- testl $1, 12(%esp)
+ testb $1, 12(%esp)
jz 90f
roll $8, %eax
90:
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %esi
- CFI_RESTORE esi
+ popl_cfi_reg ebx
+ popl_cfi_reg esi
ret
CFI_ENDPROC
ENDPROC(csum_partial)
@@ -298,12 +290,9 @@ ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
subl $4,%esp
CFI_ADJUST_CFA_OFFSET 4
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
+ pushl_cfi_reg edi
+ pushl_cfi_reg esi
+ pushl_cfi_reg ebx
movl ARGBASE+16(%esp),%eax # sum
movl ARGBASE+12(%esp),%ecx # len
movl ARGBASE+4(%esp),%esi # src
@@ -412,12 +401,9 @@ DST( movb %cl, (%edi) )
.previous
- popl_cfi %ebx
- CFI_RESTORE ebx
- popl_cfi %esi
- CFI_RESTORE esi
- popl_cfi %edi
- CFI_RESTORE edi
+ popl_cfi_reg ebx
+ popl_cfi_reg esi
+ popl_cfi_reg edi
popl_cfi %ecx # equivalent to addl $4,%esp
ret
CFI_ENDPROC
@@ -441,12 +427,9 @@ ENDPROC(csum_partial_copy_generic)
ENTRY(csum_partial_copy_generic)
CFI_STARTPROC
- pushl_cfi %ebx
- CFI_REL_OFFSET ebx, 0
- pushl_cfi %edi
- CFI_REL_OFFSET edi, 0
- pushl_cfi %esi
- CFI_REL_OFFSET esi, 0
+ pushl_cfi_reg ebx
+ pushl_cfi_reg edi
+ pushl_cfi_reg esi
movl ARGBASE+4(%esp),%esi #src
movl ARGBASE+8(%esp),%edi #dst
movl ARGBASE+12(%esp),%ecx #len
@@ -506,12 +489,9 @@ DST( movb %dl, (%edi) )
jmp 7b
.previous
- popl_cfi %esi
- CFI_RESTORE esi
- popl_cfi %edi
- CFI_RESTORE edi
- popl_cfi %ebx
- CFI_RESTORE ebx
+ popl_cfi_reg esi
+ popl_cfi_reg edi
+ popl_cfi_reg ebx
ret
CFI_ENDPROC
ENDPROC(csum_partial_copy_generic)
diff --git a/arch/x86/lib/clear_page_64.S b/arch/x86/lib/clear_page_64.S
index f2145cfa12a6..e67e579c93bd 100644
--- a/arch/x86/lib/clear_page_64.S
+++ b/arch/x86/lib/clear_page_64.S
@@ -1,31 +1,35 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
/*
- * Zero a page.
- * rdi page
- */
-ENTRY(clear_page_c)
+ * Most CPUs support enhanced REP MOVSB/STOSB instructions. It is
+ * recommended to use this when possible and we do use them by default.
+ * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
+ * Otherwise, use original.
+ */
+
+/*
+ * Zero a page.
+ * %rdi - page
+ */
+ENTRY(clear_page)
CFI_STARTPROC
+
+ ALTERNATIVE_2 "jmp clear_page_orig", "", X86_FEATURE_REP_GOOD, \
+ "jmp clear_page_c_e", X86_FEATURE_ERMS
+
movl $4096/8,%ecx
xorl %eax,%eax
rep stosq
ret
CFI_ENDPROC
-ENDPROC(clear_page_c)
+ENDPROC(clear_page)
-ENTRY(clear_page_c_e)
+ENTRY(clear_page_orig)
CFI_STARTPROC
- movl $4096,%ecx
- xorl %eax,%eax
- rep stosb
- ret
- CFI_ENDPROC
-ENDPROC(clear_page_c_e)
-ENTRY(clear_page)
- CFI_STARTPROC
xorl %eax,%eax
movl $4096/64,%ecx
.p2align 4
@@ -45,29 +49,13 @@ ENTRY(clear_page)
nop
ret
CFI_ENDPROC
-.Lclear_page_end:
-ENDPROC(clear_page)
-
- /*
- * Some CPUs support enhanced REP MOVSB/STOSB instructions.
- * It is recommended to use this when possible.
- * If enhanced REP MOVSB/STOSB is not available, try to use fast string.
- * Otherwise, use original function.
- *
- */
+ENDPROC(clear_page_orig)
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (clear_page_c - clear_page) - (2f - 1b) /* offset */
-2: .byte 0xeb /* jmp <disp8> */
- .byte (clear_page_c_e - clear_page) - (3f - 2b) /* offset */
-3:
- .previous
- .section .altinstructions,"a"
- altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
- .Lclear_page_end-clear_page, 2b-1b
- altinstruction_entry clear_page,2b,X86_FEATURE_ERMS, \
- .Lclear_page_end-clear_page,3b-2b
- .previous
+ENTRY(clear_page_c_e)
+ CFI_STARTPROC
+ movl $4096,%ecx
+ xorl %eax,%eax
+ rep stosb
+ ret
+ CFI_ENDPROC
+ENDPROC(clear_page_c_e)
diff --git a/arch/x86/lib/copy_page_64.S b/arch/x86/lib/copy_page_64.S
index 176cca67212b..8239dbcbf984 100644
--- a/arch/x86/lib/copy_page_64.S
+++ b/arch/x86/lib/copy_page_64.S
@@ -2,23 +2,26 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
+#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
+/*
+ * Some CPUs run faster using the string copy instructions (sane microcode).
+ * It is also a lot simpler. Use this when possible. But, don't use streaming
+ * copy unless the CPU indicates X86_FEATURE_REP_GOOD. Could vary the
+ * prefetch distance based on SMP/UP.
+ */
ALIGN
-copy_page_rep:
+ENTRY(copy_page)
CFI_STARTPROC
+ ALTERNATIVE "jmp copy_page_regs", "", X86_FEATURE_REP_GOOD
movl $4096/8, %ecx
rep movsq
ret
CFI_ENDPROC
-ENDPROC(copy_page_rep)
-
-/*
- * Don't use streaming copy unless the CPU indicates X86_FEATURE_REP_GOOD.
- * Could vary the prefetch distance based on SMP/UP.
-*/
+ENDPROC(copy_page)
-ENTRY(copy_page)
+ENTRY(copy_page_regs)
CFI_STARTPROC
subq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET 2*8
@@ -90,21 +93,5 @@ ENTRY(copy_page)
addq $2*8, %rsp
CFI_ADJUST_CFA_OFFSET -2*8
ret
-.Lcopy_page_end:
CFI_ENDPROC
-ENDPROC(copy_page)
-
- /* Some CPUs run faster using the string copy instructions.
- It is also a lot simpler. Use this when possible */
-
-#include <asm/cpufeature.h>
-
- .section .altinstr_replacement,"ax"
-1: .byte 0xeb /* jmp <disp8> */
- .byte (copy_page_rep - copy_page) - (2f - 1b) /* offset */
-2:
- .previous
- .section .altinstructions,"a"
- altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD, \
- .Lcopy_page_end-copy_page, 2b-1b
- .previous
+ENDPROC(copy_page_regs)
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index dee945d55594..fa997dfaef24 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -8,9 +8,6 @@
#include <linux/linkage.h>
#include <asm/dwarf2.h>
-
-#define FIX_ALIGNMENT 1
-
#include <asm/current.h>
#include <asm/asm-offsets.h>
#include <asm/thread_info.h>
@@ -19,33 +16,7 @@
#include <asm/asm.h>
#include <asm/smap.h>
-/*
- * By placing feature2 after feature1 in altinstructions section, we logically
- * implement:
- * If CPU has feature2, jmp to alt2 is used
- * else if CPU has feature1, jmp to alt1 is used
- * else jmp to orig is used.
- */
- .macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
-0:
- .byte 0xe9 /* 32bit jump */
- .long \orig-1f /* by default jump to orig */
-1:
- .section .altinstr_replacement,"ax"
-2: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt1-1b /* offset */ /* or alternatively to alt1 */
-3: .byte 0xe9 /* near jump with 32bit immediate */
- .long \alt2-1b /* offset */ /* or alternatively to alt2 */
- .previous
-
- .section .altinstructions,"a"
- altinstruction_entry 0b,2b,\feature1,5,5
- altinstruction_entry 0b,3b,\feature2,5,5
- .previous
- .endm
-
.macro ALIGN_DESTINATION
-#ifdef FIX_ALIGNMENT
/* check for bad alignment of destination */
movl %edi,%ecx
andl $7,%ecx
@@ -67,7 +38,6 @@
_ASM_EXTABLE(100b,103b)
_ASM_EXTABLE(101b,103b)
-#endif
.endm
/* Standard copy_to_user with segment limit checking */
@@ -79,9 +49,11 @@ ENTRY(_copy_to_user)
jc bad_to_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_to_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
- copy_user_generic_unrolled,copy_user_generic_string, \
- copy_user_enhanced_fast_string
+ ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
+ "jmp copy_user_generic_string", \
+ X86_FEATURE_REP_GOOD, \
+ "jmp copy_user_enhanced_fast_string", \
+ X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_to_user)
@@ -94,9 +66,11 @@ ENTRY(_copy_from_user)
jc bad_from_user
cmpq TI_addr_limit(%rax),%rcx
ja bad_from_user
- ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,X86_FEATURE_ERMS, \
- copy_user_generic_unrolled,copy_user_generic_string, \
- copy_user_enhanced_fast_string
+ ALTERNATIVE_2 "jmp copy_user_generic_unrolled", \
+ "jmp copy_user_generic_string", \
+ X86_FEATURE_REP_GOOD, \
+ "jmp copy_user_enhanced_fast_string", \
+ X86_FEATURE_ERMS
CFI_ENDPROC
ENDPROC(_copy_from_user)
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S
index 2419d5fefae3..9734182966f3 100644
--- a/arch/x86/lib/csum-copy_64.S
+++ b/arch/x86/lib/csum-copy_64.S
@@ -196,7 +196,7 @@ ENTRY(csum_partial_copy_generic)
/* handle last odd byte */
.Lhandle_1:
- testl $1, %r10d
+ testb $1, %r10b
jz .Lende
xorl %ebx, %ebx
source
diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index 1313ae6b478b..8f72b334aea0 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -52,6 +52,13 @@
*/
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
memset(insn, 0, sizeof(*insn));
insn->kaddr = kaddr;
insn->end_kaddr = kaddr + buf_len;
@@ -164,6 +171,12 @@ found:
/* VEX.W overrides opnd_size */
insn->opnd_bytes = 8;
} else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
insn->vex_prefix.nbytes = 2;
insn->next_byte += 2;
}
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index 89b53c9968e7..b046664f5a1c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -1,12 +1,20 @@
/* Copyright 2002 Andi Kleen */
#include <linux/linkage.h>
-
#include <asm/cpufeature.h>
#include <asm/dwarf2.h>
#include <asm/alternative-asm.h>
/*
+ * We build a jump to memcpy_orig by default which gets NOPped out on
+ * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
+ * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
+ * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
+ */
+
+.weak memcpy
+
+/*
* memcpy - Copy a memory block.
*
* Input:
@@ -17,15 +25,11 @@
* Output:
* rax original destination
*/
+ENTRY(__memcpy)
+ENTRY(memcpy)
+ ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
+ "jmp memcpy_erms", X86_FEATURE_ERMS
-/*
- * memcpy_c() - fast string ops (REP MOVSQ) based variant.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
- */
- .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c:
movq %rdi, %rax
movq %rdx, %rcx
shrq $3, %rcx
@@ -34,29 +38,21 @@
movl %edx, %ecx
rep movsb
ret
-.Lmemcpy_e:
- .previous
+ENDPROC(memcpy)
+ENDPROC(__memcpy)
/*
- * memcpy_c_e() - enhanced fast string memcpy. This is faster and simpler than
- * memcpy_c. Use memcpy_c_e when possible.
- *
- * This gets patched over the unrolled variant (below) via the
- * alternative instructions framework:
+ * memcpy_erms() - enhanced fast string memcpy. This is faster and
+ * simpler than memcpy. Use memcpy_erms when possible.
*/
- .section .altinstr_replacement, "ax", @progbits
-.Lmemcpy_c_e:
+ENTRY(memcpy_erms)
movq %rdi, %rax
movq %rdx, %rcx
rep movsb
ret
-.Lmemcpy_e_e:
- .previous
-
-.weak memcpy
+ENDPROC(memcpy_erms)
-ENTRY(__memcpy)
-ENTRY(memcpy)
+ENTRY(memcpy_orig)
CFI_STARTPROC
movq %rdi, %rax
@@ -183,26 +179,4 @@ ENTRY(memcpy)
.Lend:
retq
CFI_ENDPROC
-ENDPROC(memcpy)
-ENDPROC(__memcpy)
-
- /*
- * Some CPUs are adding enhanced REP MOVSB/STOSB feature
- * If the feature is supported, memcpy_c_e() is the first choice.
- * If enhanced rep movsb copy is not available, use fast string copy
- * memcpy_c() when possible. This is faster and code is simpler than
- * original memcpy().
- * Otherwise, original memcpy() is used.
- * In .altinstructions section, ERMS feature is placed after REG_GOOD
- * feature to implement the right patch order.
- *
- * Replace only beginning, memcpy is used to apply alternatives,
- * so it is silly to overwrite itself with nops - reboot is the
- * only outcome...
- */
- .section .altinstructions, "a"
- altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
- .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
- altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
- .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
- .previous
+ENDPROC(memcpy_orig)
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 9c4b530575da..0f8a0d0331b9 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -5,7 +5,6 @@
* This assembly file is re-written from memmove_64.c file.
* - Copyright 2011 Fenghua Yu <fenghua.yu@intel.com>
*/
-#define _STRING_C
#include <linux/linkage.h>
#include <asm/dwarf2.h>
#include <asm/cpufeature.h>
@@ -44,6 +43,8 @@ ENTRY(__memmove)
jg 2f
.Lmemmove_begin_forward:
+ ALTERNATIVE "", "movq %rdx, %rcx; rep movsb; retq", X86_FEATURE_ERMS
+
/*
* movsq instruction have many startup latency
* so we handle small size by general register.
@@ -207,21 +208,5 @@ ENTRY(__memmove)
13:
retq
CFI_ENDPROC
-
- .section .altinstr_replacement,"ax"
-.Lmemmove_begin_forward_efs:
- /* Forward moving data. */
- movq %rdx, %rcx
- rep movsb
- retq
-.Lmemmove_end_forward_efs:
- .previous
-
- .section .altinstructions,"a"
- altinstruction_entry .Lmemmove_begin_forward, \
- .Lmemmove_begin_forward_efs,X86_FEATURE_ERMS, \
- .Lmemmove_end_forward-.Lmemmove_begin_forward, \
- .Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
- .previous
ENDPROC(__memmove)
ENDPROC(memmove)
diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S
index 6f44935c6a60..93118fb23976 100644
--- a/arch/x86/lib/memset_64.S
+++ b/arch/x86/lib/memset_64.S
@@ -5,19 +5,30 @@
#include <asm/cpufeature.h>
#include <asm/alternative-asm.h>
+.weak memset
+
/*
* ISO C memset - set a memory block to a byte value. This function uses fast
* string to get better performance than the original function. The code is
* simpler and shorter than the orignal function as well.
- *
+ *
* rdi destination
- * rsi value (char)
- * rdx count (bytes)
- *
+ * rsi value (char)
+ * rdx count (bytes)
+ *
* rax original destination
- */
- .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c:
+ */
+ENTRY(memset)
+ENTRY(__memset)
+ /*
+ * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
+ * to use it when possible. If not available, use fast string instructions.
+ *
+ * Otherwise, use original memset function.
+ */
+ ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
+ "jmp memset_erms", X86_FEATURE_ERMS
+
movq %rdi,%r9
movq %rdx,%rcx
andl $7,%edx
@@ -31,8 +42,8 @@
rep stosb
movq %r9,%rax
ret
-.Lmemset_e:
- .previous
+ENDPROC(memset)
+ENDPROC(__memset)
/*
* ISO C memset - set a memory block to a byte value. This function uses
@@ -45,21 +56,16 @@
*
* rax original destination
*/
- .section .altinstr_replacement, "ax", @progbits
-.Lmemset_c_e:
+ENTRY(memset_erms)
movq %rdi,%r9
movb %sil,%al
movq %rdx,%rcx
rep stosb
movq %r9,%rax
ret
-.Lmemset_e_e:
- .previous
-
-.weak memset
+ENDPROC(memset_erms)
-ENTRY(memset)
-ENTRY(__memset)
+ENTRY(memset_orig)
CFI_STARTPROC
movq %rdi,%r10
@@ -134,23 +140,4 @@ ENTRY(__memset)
jmp .Lafter_bad_alignment
.Lfinal:
CFI_ENDPROC
-ENDPROC(memset)
-ENDPROC(__memset)
-
- /* Some CPUs support enhanced REP MOVSB/STOSB feature.
- * It is recommended to use this when possible.
- *
- * If enhanced REP MOVSB/STOSB feature is not available, use fast string
- * instructions.
- *
- * Otherwise, use original memset function.
- *
- * In .altinstructions section, ERMS feature is placed after REG_GOOD
- * feature to implement the right patch order.
- */
- .section .altinstructions,"a"
- altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
- .Lfinal-__memset,.Lmemset_e-.Lmemset_c
- altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
- .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e
- .previous
+ENDPROC(memset_orig)
diff --git a/arch/x86/lib/msr-reg.S b/arch/x86/lib/msr-reg.S
index f6d13eefad10..3ca5218fbece 100644
--- a/arch/x86/lib/msr-reg.S
+++ b/arch/x86/lib/msr-reg.S
@@ -14,8 +14,8 @@
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
- pushq_cfi %rbx
- pushq_cfi %rbp
+ pushq_cfi_reg rbx
+ pushq_cfi_reg rbp
movq %rdi, %r10 /* Save pointer */
xorl %r11d, %r11d /* Return value */
movl (%rdi), %eax
@@ -35,8 +35,8 @@ ENTRY(\op\()_safe_regs)
movl %ebp, 20(%r10)
movl %esi, 24(%r10)
movl %edi, 28(%r10)
- popq_cfi %rbp
- popq_cfi %rbx
+ popq_cfi_reg rbp
+ popq_cfi_reg rbx
ret
3:
CFI_RESTORE_STATE
@@ -53,10 +53,10 @@ ENDPROC(\op\()_safe_regs)
.macro op_safe_regs op
ENTRY(\op\()_safe_regs)
CFI_STARTPROC
- pushl_cfi %ebx
- pushl_cfi %ebp
- pushl_cfi %esi
- pushl_cfi %edi
+ pushl_cfi_reg ebx
+ pushl_cfi_reg ebp
+ pushl_cfi_reg esi
+ pushl_cfi_reg edi
pushl_cfi $0 /* Return value */
pushl_cfi %eax
movl 4(%eax), %ecx
@@ -80,10 +80,10 @@ ENTRY(\op\()_safe_regs)
movl %esi, 24(%eax)
movl %edi, 28(%eax)
popl_cfi %eax
- popl_cfi %edi
- popl_cfi %esi
- popl_cfi %ebp
- popl_cfi %ebx
+ popl_cfi_reg edi
+ popl_cfi_reg esi
+ popl_cfi_reg ebp
+ popl_cfi_reg ebx
ret
3:
CFI_RESTORE_STATE
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
index 5dff5f042468..2322abe4da3b 100644
--- a/arch/x86/lib/rwsem.S
+++ b/arch/x86/lib/rwsem.S
@@ -34,10 +34,10 @@
*/
#define save_common_regs \
- pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0
+ pushl_cfi_reg ecx
#define restore_common_regs \
- popl_cfi %ecx; CFI_RESTORE ecx
+ popl_cfi_reg ecx
/* Avoid uglifying the argument copying x86-64 needs to do. */
.macro movq src, dst
@@ -64,22 +64,22 @@
*/
#define save_common_regs \
- pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \
- pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \
- pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \
- pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \
- pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \
- pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \
- pushq_cfi %r11; CFI_REL_OFFSET r11, 0
+ pushq_cfi_reg rdi; \
+ pushq_cfi_reg rsi; \
+ pushq_cfi_reg rcx; \
+ pushq_cfi_reg r8; \
+ pushq_cfi_reg r9; \
+ pushq_cfi_reg r10; \
+ pushq_cfi_reg r11
#define restore_common_regs \
- popq_cfi %r11; CFI_RESTORE r11; \
- popq_cfi %r10; CFI_RESTORE r10; \
- popq_cfi %r9; CFI_RESTORE r9; \
- popq_cfi %r8; CFI_RESTORE r8; \
- popq_cfi %rcx; CFI_RESTORE rcx; \
- popq_cfi %rsi; CFI_RESTORE rsi; \
- popq_cfi %rdi; CFI_RESTORE rdi
+ popq_cfi_reg r11; \
+ popq_cfi_reg r10; \
+ popq_cfi_reg r9; \
+ popq_cfi_reg r8; \
+ popq_cfi_reg rcx; \
+ popq_cfi_reg rsi; \
+ popq_cfi_reg rdi
#endif
@@ -87,12 +87,10 @@
ENTRY(call_rwsem_down_read_failed)
CFI_STARTPROC
save_common_regs
- __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
- CFI_REL_OFFSET __ASM_REG(dx), 0
+ __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_down_read_failed
- __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
- CFI_RESTORE __ASM_REG(dx)
+ __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC
@@ -124,12 +122,10 @@ ENDPROC(call_rwsem_wake)
ENTRY(call_rwsem_downgrade_wake)
CFI_STARTPROC
save_common_regs
- __ASM_SIZE(push,_cfi) %__ASM_REG(dx)
- CFI_REL_OFFSET __ASM_REG(dx), 0
+ __ASM_SIZE(push,_cfi_reg) __ASM_REG(dx)
movq %rax,%rdi
call rwsem_downgrade_wake
- __ASM_SIZE(pop,_cfi) %__ASM_REG(dx)
- CFI_RESTORE __ASM_REG(dx)
+ __ASM_SIZE(pop,_cfi_reg) __ASM_REG(dx)
restore_common_regs
ret
CFI_ENDPROC
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S
index e28cdaf5ac2c..5eb715087b80 100644
--- a/arch/x86/lib/thunk_32.S
+++ b/arch/x86/lib/thunk_32.S
@@ -13,12 +13,9 @@
.globl \name
\name:
CFI_STARTPROC
- pushl_cfi %eax
- CFI_REL_OFFSET eax, 0
- pushl_cfi %ecx
- CFI_REL_OFFSET ecx, 0
- pushl_cfi %edx
- CFI_REL_OFFSET edx, 0
+ pushl_cfi_reg eax
+ pushl_cfi_reg ecx
+ pushl_cfi_reg edx
.if \put_ret_addr_in_eax
/* Place EIP in the arg1 */
@@ -26,12 +23,9 @@
.endif
call \func
- popl_cfi %edx
- CFI_RESTORE edx
- popl_cfi %ecx
- CFI_RESTORE ecx
- popl_cfi %eax
- CFI_RESTORE eax
+ popl_cfi_reg edx
+ popl_cfi_reg ecx
+ popl_cfi_reg eax
ret
CFI_ENDPROC
_ASM_NOKPROBE(\name)
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S
index b30b5ebd614a..f89ba4e93025 100644
--- a/arch/x86/lib/thunk_64.S
+++ b/arch/x86/lib/thunk_64.S
@@ -17,9 +17,18 @@
CFI_STARTPROC
/* this one pushes 9 elems, the next one would be %rIP */
- SAVE_ARGS
+ pushq_cfi_reg rdi
+ pushq_cfi_reg rsi
+ pushq_cfi_reg rdx
+ pushq_cfi_reg rcx
+ pushq_cfi_reg rax
+ pushq_cfi_reg r8
+ pushq_cfi_reg r9
+ pushq_cfi_reg r10
+ pushq_cfi_reg r11
.if \put_ret_addr_in_rdi
+ /* 9*8(%rsp) is return addr on stack */
movq_cfi_restore 9*8, rdi
.endif
@@ -45,11 +54,22 @@
#endif
#endif
- /* SAVE_ARGS below is used only for the .cfi directives it contains. */
+#if defined(CONFIG_TRACE_IRQFLAGS) \
+ || defined(CONFIG_DEBUG_LOCK_ALLOC) \
+ || defined(CONFIG_PREEMPT)
CFI_STARTPROC
- SAVE_ARGS
+ CFI_ADJUST_CFA_OFFSET 9*8
restore:
- RESTORE_ARGS
+ popq_cfi_reg r11
+ popq_cfi_reg r10
+ popq_cfi_reg r9
+ popq_cfi_reg r8
+ popq_cfi_reg rax
+ popq_cfi_reg rcx
+ popq_cfi_reg rdx
+ popq_cfi_reg rsi
+ popq_cfi_reg rdi
ret
CFI_ENDPROC
_ASM_NOKPROBE(restore)
+#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index c905e89e19fe..1f33b3d1fd68 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -69,21 +69,20 @@ EXPORT_SYMBOL(copy_in_user);
* it is not necessary to optimize tail handling.
*/
__visible unsigned long
-copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest)
+copy_user_handle_tail(char *to, char *from, unsigned len)
{
- char c;
- unsigned zero_len;
-
for (; len; --len, to++) {
+ char c;
+
if (__get_user_nocheck(c, from++, sizeof(char)))
break;
if (__put_user_nocheck(c, to, sizeof(char)))
break;
}
-
- for (c = 0, zero_len = len; zerorest && zero_len; --zero_len)
- if (__put_user_nocheck(c, to++, sizeof(char)))
- break;
clac();
+
+ /* If the destination is a kernel buffer, we always clear the end */
+ if ((unsigned long)to >= TASK_SIZE_MAX)
+ memset(to, 0, len);
return len;
}
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt
index 1a2be7c6895d..816488c0b97e 100644
--- a/arch/x86/lib/x86-opcode-map.txt
+++ b/arch/x86/lib/x86-opcode-map.txt
@@ -273,6 +273,9 @@ dd: ESC
de: ESC
df: ESC
# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
e0: LOOPNE/LOOPNZ Jb (f64)
e1: LOOPE/LOOPZ Jb (f64)
e2: LOOP Jb (f64)
@@ -281,6 +284,10 @@ e4: IN AL,Ib
e5: IN eAX,Ib
e6: OUT Ib,AL
e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
e8: CALL Jz (f64)
e9: JMP-near Jz (f64)
ea: JMP-far Ap (i64)
@@ -456,6 +463,7 @@ AVXcode: 1
7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
80: JO Jz (f64)
81: JNO Jz (f64)
82: JB/JC/JNAE Jz (f64)
@@ -842,6 +850,7 @@ EndTable
GrpTable: Grp5
0: INC Ev
1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
2: CALLN Ev (f64)
3: CALLF Ep
4: JMPN Ev (f64)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index ede025fb46f1..181c53bac3a7 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -59,7 +59,7 @@ static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
int ret = 0;
/* kprobe_running() needs smp_processor_id() */
- if (kprobes_built_in() && !user_mode_vm(regs)) {
+ if (kprobes_built_in() && !user_mode(regs)) {
preempt_disable();
if (kprobe_running() && kprobe_fault_handler(regs, 14))
ret = 1;
@@ -148,7 +148,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
instr = (void *)convert_ip_to_linear(current, regs);
max_instr = instr + 15;
- if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE)
+ if (user_mode(regs) && instr >= (unsigned char *)TASK_SIZE_MAX)
return 0;
while (instr < max_instr) {
@@ -1035,7 +1035,7 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs)
if (error_code & PF_USER)
return false;
- if (!user_mode_vm(regs) && (regs->flags & X86_EFLAGS_AC))
+ if (!user_mode(regs) && (regs->flags & X86_EFLAGS_AC))
return false;
return true;
@@ -1140,7 +1140,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
* User-mode registers count as a user access even for any
* potential system fault or CPU buglet:
*/
- if (user_mode_vm(regs)) {
+ if (user_mode(regs)) {
local_irq_enable();
error_code |= PF_USER;
flags |= FAULT_FLAG_USER;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index a110efca6d06..52417e771af9 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -179,7 +179,8 @@ static void __init probe_page_size_mask(void)
if (cpu_has_pge) {
cr4_set_bits_and_update_boot(X86_CR4_PGE);
__supported_pte_mask |= _PAGE_GLOBAL;
- }
+ } else
+ __supported_pte_mask &= ~_PAGE_GLOBAL;
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/oprofile/backtrace.c b/arch/x86/oprofile/backtrace.c
index 5d04be5efb64..4e664bdb535a 100644
--- a/arch/x86/oprofile/backtrace.c
+++ b/arch/x86/oprofile/backtrace.c
@@ -111,7 +111,7 @@ x86_backtrace(struct pt_regs * const regs, unsigned int depth)
{
struct stack_frame *head = (struct stack_frame *)frame_pointer(regs);
- if (!user_mode_vm(regs)) {
+ if (!user_mode(regs)) {
unsigned long stack = kernel_stack_pointer(regs);
if (depth)
dump_trace(NULL, regs, (unsigned long *)stack, 0,
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 6ac273832f28..e4695985f9de 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -331,7 +331,7 @@ static void probe_pci_root_info(struct pci_root_info *info,
struct list_head *list)
{
int ret;
- struct resource_entry *entry;
+ struct resource_entry *entry, *tmp;
sprintf(info->name, "PCI Bus %04x:%02x", domain, busnum);
info->bridge = device;
@@ -345,8 +345,13 @@ static void probe_pci_root_info(struct pci_root_info *info,
dev_dbg(&device->dev,
"no IO and memory resources present in _CRS\n");
else
- resource_list_for_each_entry(entry, list)
- entry->res->name = info->name;
+ resource_list_for_each_entry_safe(entry, tmp, list) {
+ if ((entry->res->flags & IORESOURCE_WINDOW) == 0 ||
+ (entry->res->flags & IORESOURCE_DISABLED))
+ resource_list_destroy_entry(entry);
+ else
+ entry->res->name = info->name;
+ }
}
struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
index 3d2612b68694..2fb384724ebb 100644
--- a/arch/x86/pci/common.c
+++ b/arch/x86/pci/common.c
@@ -513,31 +513,6 @@ void __init pcibios_set_cache_line_size(void)
}
}
-/*
- * Some device drivers assume dev->irq won't change after calling
- * pci_disable_device(). So delay releasing of IRQ resource to driver
- * unbinding time. Otherwise it will break PM subsystem and drivers
- * like xen-pciback etc.
- */
-static int pci_irq_notifier(struct notifier_block *nb, unsigned long action,
- void *data)
-{
- struct pci_dev *dev = to_pci_dev(data);
-
- if (action != BUS_NOTIFY_UNBOUND_DRIVER)
- return NOTIFY_DONE;
-
- if (pcibios_disable_irq)
- pcibios_disable_irq(dev);
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block pci_irq_nb = {
- .notifier_call = pci_irq_notifier,
- .priority = INT_MIN,
-};
-
int __init pcibios_init(void)
{
if (!raw_pci_ops) {
@@ -550,9 +525,6 @@ int __init pcibios_init(void)
if (pci_bf_sort >= pci_force_bf)
pci_sort_breadthfirst();
-
- bus_register_notifier(&pci_bus_type, &pci_irq_nb);
-
return 0;
}
@@ -711,6 +683,12 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
return 0;
}
+void pcibios_disable_device (struct pci_dev *dev)
+{
+ if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq)
+ pcibios_disable_irq(dev);
+}
+
int pci_ext_cfg_avail(void)
{
if (raw_pci_ext_ops)
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index efb849323c74..852aa4c92da0 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -234,10 +234,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
static void intel_mid_pci_irq_disable(struct pci_dev *dev)
{
- if (dev->irq_managed && dev->irq > 0) {
+ if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+ dev->irq > 0) {
mp_unmap_irq(dev->irq);
dev->irq_managed = 0;
- dev->irq = 0;
}
}
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index e71b3dbd87b8..5dc6ca5e1741 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1256,9 +1256,22 @@ static int pirq_enable_irq(struct pci_dev *dev)
return 0;
}
+bool mp_should_keep_irq(struct device *dev)
+{
+ if (dev->power.is_prepared)
+ return true;
+#ifdef CONFIG_PM
+ if (dev->power.runtime_status == RPM_SUSPENDING)
+ return true;
+#endif
+
+ return false;
+}
+
static void pirq_disable_irq(struct pci_dev *dev)
{
- if (io_apic_assign_pci_irqs && dev->irq_managed && dev->irq) {
+ if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
+ dev->irq_managed && dev->irq) {
mp_unmap_irq(dev->irq);
dev->irq = 0;
dev->irq_managed = 0;
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index dbc8627a5cdf..02744df576d5 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -85,12 +85,20 @@ static efi_status_t __init phys_efi_set_virtual_address_map(
efi_memory_desc_t *virtual_map)
{
efi_status_t status;
+ unsigned long flags;
+ pgd_t *save_pgd;
- efi_call_phys_prolog();
+ save_pgd = efi_call_phys_prolog();
+
+ /* Disable interrupts around EFI calls: */
+ local_irq_save(flags);
status = efi_call_phys(efi_phys.set_virtual_address_map,
memory_map_size, descriptor_size,
descriptor_version, virtual_map);
- efi_call_phys_epilog();
+ local_irq_restore(flags);
+
+ efi_call_phys_epilog(save_pgd);
+
return status;
}
@@ -491,7 +499,8 @@ void __init efi_init(void)
if (efi_memmap_init())
return;
- print_efi_memmap();
+ if (efi_enabled(EFI_DBG))
+ print_efi_memmap();
}
void __init efi_late_init(void)
@@ -939,6 +948,8 @@ static int __init arch_parse_efi_cmdline(char *str)
{
if (parse_option_str(str, "old_map"))
set_bit(EFI_OLD_MEMMAP, &efi.flags);
+ if (parse_option_str(str, "debug"))
+ set_bit(EFI_DBG, &efi.flags);
return 0;
}
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c
index 40e7cda52936..ed5b67338294 100644
--- a/arch/x86/platform/efi/efi_32.c
+++ b/arch/x86/platform/efi/efi_32.c
@@ -33,11 +33,10 @@
/*
* To make EFI call EFI runtime service in physical addressing mode we need
- * prolog/epilog before/after the invocation to disable interrupt, to
- * claim EFI runtime service handler exclusively and to duplicate a memory in
- * low memory space say 0 - 3G.
+ * prolog/epilog before/after the invocation to claim the EFI runtime service
+ * handler exclusively and to duplicate a memory mapping in low memory space,
+ * say 0 - 3G.
*/
-static unsigned long efi_rt_eflags;
void efi_sync_low_kernel_mappings(void) {}
void __init efi_dump_pagetable(void) {}
@@ -57,21 +56,24 @@ void __init efi_map_region(efi_memory_desc_t *md)
void __init efi_map_region_fixed(efi_memory_desc_t *md) {}
void __init parse_efi_setup(u64 phys_addr, u32 data_len) {}
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
{
struct desc_ptr gdt_descr;
+ pgd_t *save_pgd;
- local_irq_save(efi_rt_eflags);
-
+ /* Current pgd is swapper_pg_dir, we'll restore it later: */
+ save_pgd = swapper_pg_dir;
load_cr3(initial_page_table);
__flush_tlb_all();
gdt_descr.address = __pa(get_cpu_gdt_table(0));
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
+
+ return save_pgd;
}
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
struct desc_ptr gdt_descr;
@@ -79,10 +81,8 @@ void __init efi_call_phys_epilog(void)
gdt_descr.size = GDT_SIZE - 1;
load_gdt(&gdt_descr);
- load_cr3(swapper_pg_dir);
+ load_cr3(save_pgd);
__flush_tlb_all();
-
- local_irq_restore(efi_rt_eflags);
}
void __init efi_runtime_mkexec(void)
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
index 17e80d829df0..a0ac0f9c307f 100644
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -41,9 +41,6 @@
#include <asm/realmode.h>
#include <asm/time.h>
-static pgd_t *save_pgd __initdata;
-static unsigned long efi_flags __initdata;
-
/*
* We allocate runtime services regions bottom-up, starting from -4G, i.e.
* 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G.
@@ -78,17 +75,18 @@ static void __init early_code_mapping_set_exec(int executable)
}
}
-void __init efi_call_phys_prolog(void)
+pgd_t * __init efi_call_phys_prolog(void)
{
unsigned long vaddress;
+ pgd_t *save_pgd;
+
int pgd;
int n_pgds;
if (!efi_enabled(EFI_OLD_MEMMAP))
- return;
+ return NULL;
early_code_mapping_set_exec(1);
- local_irq_save(efi_flags);
n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT), PGDIR_SIZE);
save_pgd = kmalloc(n_pgds * sizeof(pgd_t), GFP_KERNEL);
@@ -99,24 +97,29 @@ void __init efi_call_phys_prolog(void)
set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress));
}
__flush_tlb_all();
+
+ return save_pgd;
}
-void __init efi_call_phys_epilog(void)
+void __init efi_call_phys_epilog(pgd_t *save_pgd)
{
/*
* After the lock is released, the original page table is restored.
*/
- int pgd;
- int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+ int pgd_idx;
+ int nr_pgds;
- if (!efi_enabled(EFI_OLD_MEMMAP))
+ if (!save_pgd)
return;
- for (pgd = 0; pgd < n_pgds; pgd++)
- set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]);
+ nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE);
+
+ for (pgd_idx = 0; pgd_idx < nr_pgds; pgd_idx++)
+ set_pgd(pgd_offset_k(pgd_idx * PGDIR_SIZE), save_pgd[pgd_idx]);
+
kfree(save_pgd);
+
__flush_tlb_all();
- local_irq_restore(efi_flags);
early_code_mapping_set_exec(0);
}
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 3e32ed5648a0..757678fb26e1 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -134,7 +134,7 @@ static void do_fpu_end(void)
static void fix_processor_context(void)
{
int cpu = smp_processor_id();
- struct tss_struct *t = &per_cpu(init_tss, cpu);
+ struct tss_struct *t = &per_cpu(cpu_tss, cpu);
#ifdef CONFIG_X86_64
struct desc_struct *desc = get_cpu_gdt_table(cpu);
tss_desc tss;
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index b3560ece1c9f..ef8187f9d28d 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -119,7 +119,7 @@
110 i386 iopl sys_iopl
111 i386 vhangup sys_vhangup
112 i386 idle
-113 i386 vm86old sys_vm86old sys32_vm86_warning
+113 i386 vm86old sys_vm86old sys_ni_syscall
114 i386 wait4 sys_wait4 compat_sys_wait4
115 i386 swapoff sys_swapoff
116 i386 sysinfo sys_sysinfo compat_sys_sysinfo
@@ -172,7 +172,7 @@
163 i386 mremap sys_mremap
164 i386 setresuid sys_setresuid16
165 i386 getresuid sys_getresuid16
-166 i386 vm86 sys_vm86 sys32_vm86_warning
+166 i386 vm86 sys_vm86 sys_ni_syscall
167 i386 query_module
168 i386 poll sys_poll
169 i386 nfsservctl
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 8d656fbb57aa..9ef32d5f1b19 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -178,7 +178,7 @@
169 common reboot sys_reboot
170 common sethostname sys_sethostname
171 common setdomainname sys_setdomainname
-172 common iopl stub_iopl
+172 common iopl sys_iopl
173 common ioperm sys_ioperm
174 64 create_module
175 common init_module sys_init_module
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h
index 2d7d9a1f5b53..8ffd2146fa6a 100644
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -64,8 +64,8 @@
*/
static inline void rdtsc_barrier(void)
{
- alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC);
- alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC);
+ alternative_2("", "mfence", X86_FEATURE_MFENCE_RDTSC,
+ "lfence", X86_FEATURE_LFENCE_RDTSC);
}
#endif
diff --git a/arch/x86/um/sys_call_table_64.c b/arch/x86/um/sys_call_table_64.c
index 5cdfa9db2217..a75d8700472a 100644
--- a/arch/x86/um/sys_call_table_64.c
+++ b/arch/x86/um/sys_call_table_64.c
@@ -16,7 +16,7 @@
*/
/* Not going to be implemented by UML, since we have no hardware. */
-#define stub_iopl sys_ni_syscall
+#define sys_iopl sys_ni_syscall
#define sys_ioperm sys_ni_syscall
/*
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 9793322751e0..40d2473836c9 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -82,18 +82,15 @@ static notrace cycle_t vread_pvclock(int *mode)
cycle_t ret;
u64 last;
u32 version;
+ u32 migrate_count;
u8 flags;
unsigned cpu, cpu1;
/*
- * Note: hypervisor must guarantee that:
- * 1. cpu ID number maps 1:1 to per-CPU pvclock time info.
- * 2. that per-CPU pvclock time info is updated if the
- * underlying CPU changes.
- * 3. that version is increased whenever underlying CPU
- * changes.
- *
+ * When looping to get a consistent (time-info, tsc) pair, we
+ * also need to deal with the possibility we can switch vcpus,
+ * so make sure we always re-fetch time-info for the current vcpu.
*/
do {
cpu = __getcpu() & VGETCPU_CPU_MASK;
@@ -102,20 +99,27 @@ static notrace cycle_t vread_pvclock(int *mode)
* __getcpu() calls (Gleb).
*/
- pvti = get_pvti(cpu);
+ /* Make sure migrate_count will change if we leave the VCPU. */
+ do {
+ pvti = get_pvti(cpu);
+ migrate_count = pvti->migrate_count;
+
+ cpu1 = cpu;
+ cpu = __getcpu() & VGETCPU_CPU_MASK;
+ } while (unlikely(cpu != cpu1));
version = __pvclock_read_cycles(&pvti->pvti, &ret, &flags);
/*
* Test we're still on the cpu as well as the version.
- * We could have been migrated just after the first
- * vgetcpu but before fetching the version, so we
- * wouldn't notice a version change.
+ * - We must read TSC of pvti's VCPU.
+ * - KVM doesn't follow the versioning protocol, so data could
+ * change before version if we left the VCPU.
*/
- cpu1 = __getcpu() & VGETCPU_CPU_MASK;
- } while (unlikely(cpu != cpu1 ||
- (pvti->pvti.version & 1) ||
- pvti->pvti.version != version));
+ smp_rmb();
+ } while (unlikely((pvti->pvti.version & 1) ||
+ pvti->pvti.version != version ||
+ pvti->migrate_count != migrate_count));
if (unlikely(!(flags & PVCLOCK_TSC_STABLE_BIT)))
*mode = VCLOCK_NONE;
diff --git a/arch/x86/vdso/vdso32/sigreturn.S b/arch/x86/vdso/vdso32/sigreturn.S
index 31776d0efc8c..d7ec4e251c0a 100644
--- a/arch/x86/vdso/vdso32/sigreturn.S
+++ b/arch/x86/vdso/vdso32/sigreturn.S
@@ -17,6 +17,7 @@
.text
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
+ nop /* this guy is needed for .LSTARTFDEDLSI1 below (watch for HACK) */
ALIGN
__kernel_sigreturn:
.LSTART_sigreturn:
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 5240f563076d..81665c9f2132 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -912,6 +912,7 @@ static void xen_load_sp0(struct tss_struct *tss,
mcs = xen_mc_entry(0);
MULTI_stack_switch(mcs.mc, __KERNEL_DS, thread->sp0);
xen_mc_issue(PARAVIRT_LAZY_CPU);
+ tss->x86_tss.sp0 = thread->sp0;
}
static void xen_set_iopl_mask(unsigned mask)
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 740ae3026a14..b47124d4cd67 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -91,6 +91,12 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
unsigned long xen_max_p2m_pfn __read_mostly;
EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
+#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#else
+#define P2M_LIMIT 0
+#endif
+
static DEFINE_SPINLOCK(p2m_update_lock);
static unsigned long *p2m_mid_missing_mfn;
@@ -385,9 +391,11 @@ static void __init xen_rebuild_p2m_list(unsigned long *p2m)
void __init xen_vmalloc_p2m_tree(void)
{
static struct vm_struct vm;
+ unsigned long p2m_limit;
+ p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
vm.flags = VM_ALLOC;
- vm.size = ALIGN(sizeof(unsigned long) * xen_max_p2m_pfn,
+ vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
PMD_SIZE * PMDS_PER_MID_PAGE);
vm_area_register_early(&vm, PMD_SIZE * PMDS_PER_MID_PAGE);
pr_notice("p2m virtual area at %p, size is %lx\n", vm.addr, vm.size);
@@ -563,7 +571,7 @@ static bool alloc_p2m(unsigned long pfn)
if (p2m_pfn == PFN_DOWN(__pa(p2m_missing)))
p2m_init(p2m);
else
- p2m_init_identity(p2m, pfn);
+ p2m_init_identity(p2m, pfn & ~(P2M_PER_PAGE - 1));
spin_lock_irqsave(&p2m_update_lock, flags);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index 08e8489c47f1..7413ee3706d0 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -445,15 +445,7 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
{
int rc;
- per_cpu(current_task, cpu) = idle;
-#ifdef CONFIG_X86_32
- irq_ctx_init(cpu);
-#else
- clear_tsk_thread_flag(idle, TIF_FORK);
-#endif
- per_cpu(kernel_stack, cpu) =
- (unsigned long)task_stack_page(idle) -
- KERNEL_STACK_OFFSET + THREAD_SIZE;
+ common_cpu_up(cpu, idle);
xen_setup_runstate_info(cpu);
xen_setup_timer(cpu);
@@ -468,10 +460,6 @@ static int xen_cpu_up(unsigned int cpu, struct task_struct *idle)
if (rc)
return rc;
- if (num_online_cpus() == 1)
- /* Just in case we booted with a single CPU. */
- alternatives_enable_smp();
-
rc = xen_smp_intr_init(cpu);
if (rc)
return rc;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index c4df9dbd63b7..d9497698645a 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -1,5 +1,5 @@
#include <linux/types.h>
-#include <linux/clockchips.h>
+#include <linux/tick.h>
#include <xen/interface/xen.h>
#include <xen/grant_table.h>
@@ -81,17 +81,14 @@ void xen_arch_post_suspend(int cancelled)
static void xen_vcpu_notify_restore(void *data)
{
- unsigned long reason = (unsigned long)data;
-
/* Boot processor notified via generic timekeeping_resume() */
- if ( smp_processor_id() == 0)
+ if (smp_processor_id() == 0)
return;
- clockevents_notify(reason, NULL);
+ tick_resume_local();
}
void xen_arch_resume(void)
{
- on_each_cpu(xen_vcpu_notify_restore,
- (void *)CLOCK_EVT_NOTIFY_RESUME, 1);
+ on_each_cpu(xen_vcpu_notify_restore, NULL, 1);
}
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 53adefda4275..985fc3ee0973 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -68,11 +68,11 @@ ENTRY(xen_sysret64)
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
- movq %rsp, PER_CPU_VAR(old_rsp)
+ movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER_DS
- pushq PER_CPU_VAR(old_rsp)
+ pushq PER_CPU_VAR(rsp_scratch)
pushq %r11
pushq $__USER_CS
pushq %rcx
@@ -87,11 +87,11 @@ ENTRY(xen_sysret32)
* We're already on the usermode stack at this point, but
* still with the kernel gs, so we can easily switch back
*/
- movq %rsp, PER_CPU_VAR(old_rsp)
+ movq %rsp, PER_CPU_VAR(rsp_scratch)
movq PER_CPU_VAR(kernel_stack), %rsp
pushq $__USER32_DS
- pushq PER_CPU_VAR(old_rsp)
+ pushq PER_CPU_VAR(rsp_scratch)
pushq %r11
pushq $__USER32_CS
pushq %rcx