diff options
author | David S. Miller <davem@davemloft.net> | 2018-01-19 22:59:33 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-01-19 22:59:33 -0500 |
commit | 8565d26bcb2ff6df646e946d2913fcf706d46b66 (patch) | |
tree | 21ffaccc3cbac5e558d51c20cfbecbfec86a02c4 /arch | |
parent | 85831e56a1d0c75a1560e61acbb8591e9f11c6b7 (diff) | |
parent | ec835f8104a21f4d4eeb9d316ee71d2b4a7f00de (diff) | |
download | linux-8565d26bcb2ff6df646e946d2913fcf706d46b66.tar.bz2 |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
The BPF verifier conflict was some minor contextual issue.
The TUN conflict was less trivial. Cong Wang fixed a memory leak of
tfile->tx_array in 'net'. This is an skb_array. But meanwhile in
net-next tun changed tfile->tx_arry into tfile->tx_ring which is a
ptr_ring.
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'arch')
37 files changed, 694 insertions, 354 deletions
diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index eed89e659143..a1f4d6d5a569 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -293,12 +293,12 @@ label = "u-boot env"; reg = <0 0x020000>; }; - partition@0x020000 { + partition@20000 { /* The LCDK defaults to booting from this partition */ label = "u-boot"; reg = <0x020000 0x080000>; }; - partition@0x0a0000 { + partition@a0000 { label = "free space"; reg = <0x0a0000 0>; }; diff --git a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts index cf2f5240e176..27cc913ca0f5 100644 --- a/arch/arm/boot/dts/kirkwood-openblocks_a7.dts +++ b/arch/arm/boot/dts/kirkwood-openblocks_a7.dts @@ -53,7 +53,8 @@ }; pinctrl: pin-controller@10000 { - pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header>; + pinctrl-0 = <&pmx_dip_switches &pmx_gpio_header + &pmx_gpio_header_gpo>; pinctrl-names = "default"; pmx_uart0: pmx-uart0 { @@ -85,11 +86,16 @@ * ground. */ pmx_gpio_header: pmx-gpio-header { - marvell,pins = "mpp17", "mpp7", "mpp29", "mpp28", + marvell,pins = "mpp17", "mpp29", "mpp28", "mpp35", "mpp34", "mpp40"; marvell,function = "gpio"; }; + pmx_gpio_header_gpo: pxm-gpio-header-gpo { + marvell,pins = "mpp7"; + marvell,function = "gpo"; + }; + pmx_gpio_init: pmx-init { marvell,pins = "mpp38"; marvell,function = "gpio"; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 5840f5c75c3b..4f2f2eea0755 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -1104,7 +1104,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi index 59655e42e4b0..bd0cd3204273 100644 --- a/arch/arm/boot/dts/sun7i-a20.dtsi +++ b/arch/arm/boot/dts/sun7i-a20.dtsi @@ -1354,7 +1354,7 @@ be1_out_tcon0: endpoint@0 { reg = <0>; - remote-endpoint = <&tcon1_in_be0>; + remote-endpoint = <&tcon0_in_be1>; }; be1_out_tcon1: endpoint@1 { diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig index 5caaf971fb50..df433abfcb02 100644 --- a/arch/arm/configs/sunxi_defconfig +++ b/arch/arm/configs/sunxi_defconfig @@ -10,6 +10,7 @@ CONFIG_SMP=y CONFIG_NR_CPUS=8 CONFIG_AEABI=y CONFIG_HIGHMEM=y +CONFIG_CMA=y CONFIG_ARM_APPENDED_DTB=y CONFIG_ARM_ATAG_DTB_COMPAT=y CONFIG_CPU_FREQ=y @@ -33,6 +34,7 @@ CONFIG_CAN_SUN4I=y # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DMA_CMA=y CONFIG_BLK_DEV_SD=y CONFIG_ATA=y CONFIG_AHCI_SUNXI=y diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 4425189bb24c..a5edea07b8d5 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -27,14 +27,58 @@ int bpf_jit_enable __read_mostly; +/* + * eBPF prog stack layout: + * + * high + * original ARM_SP => +-----+ + * | | callee saved registers + * +-----+ <= (BPF_FP + SCRATCH_SIZE) + * | ... | eBPF JIT scratch space + * eBPF fp register => +-----+ + * (BPF_FP) | ... | eBPF prog stack + * +-----+ + * |RSVD | JIT scratchpad + * current ARM_SP => +-----+ <= (BPF_FP - STACK_SIZE + SCRATCH_SIZE) + * | | + * | ... | Function call stack + * | | + * +-----+ + * low + * + * The callee saved registers depends on whether frame pointers are enabled. + * With frame pointers (to be compliant with the ABI): + * + * high + * original ARM_SP => +------------------+ \ + * | pc | | + * current ARM_FP => +------------------+ } callee saved registers + * |r4-r8,r10,fp,ip,lr| | + * +------------------+ / + * low + * + * Without frame pointers: + * + * high + * original ARM_SP => +------------------+ + * | r4-r8,r10,fp,lr | callee saved registers + * current ARM_FP => +------------------+ + * low + * + * When popping registers off the stack at the end of a BPF function, we + * reference them via the current ARM_FP register. + */ +#define CALLEE_MASK (1 << ARM_R4 | 1 << ARM_R5 | 1 << ARM_R6 | \ + 1 << ARM_R7 | 1 << ARM_R8 | 1 << ARM_R10 | \ + 1 << ARM_FP) +#define CALLEE_PUSH_MASK (CALLEE_MASK | 1 << ARM_LR) +#define CALLEE_POP_MASK (CALLEE_MASK | 1 << ARM_PC) + #define STACK_OFFSET(k) (k) #define TMP_REG_1 (MAX_BPF_JIT_REG + 0) /* TEMP Register 1 */ #define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* TEMP Register 2 */ #define TCALL_CNT (MAX_BPF_JIT_REG + 2) /* Tail Call Count */ -/* Flags used for JIT optimization */ -#define SEEN_CALL (1 << 0) - #define FLAG_IMM_OVERFLOW (1 << 0) /* @@ -95,7 +139,6 @@ static const u8 bpf2a32[][2] = { * idx : index of current last JITed instruction. * prologue_bytes : bytes used in prologue. * epilogue_offset : offset of epilogue starting. - * seen : bit mask used for JIT optimization. * offsets : array of eBPF instruction offsets in * JITed code. * target : final JITed code. @@ -110,7 +153,6 @@ struct jit_ctx { unsigned int idx; unsigned int prologue_bytes; unsigned int epilogue_offset; - u32 seen; u32 flags; u32 *offsets; u32 *target; @@ -179,8 +221,13 @@ static void jit_fill_hole(void *area, unsigned int size) *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); } -/* Stack must be multiples of 16 Bytes */ -#define STACK_ALIGN(sz) (((sz) + 3) & ~3) +#if defined(CONFIG_AEABI) && (__LINUX_ARM_ARCH__ >= 5) +/* EABI requires the stack to be aligned to 64-bit boundaries */ +#define STACK_ALIGNMENT 8 +#else +/* Stack must be aligned to 32-bit boundaries */ +#define STACK_ALIGNMENT 4 +#endif /* Stack space for BPF_REG_2, BPF_REG_3, BPF_REG_4, * BPF_REG_5, BPF_REG_7, BPF_REG_8, BPF_REG_9, @@ -194,7 +241,7 @@ static void jit_fill_hole(void *area, unsigned int size) + SCRATCH_SIZE + \ + 4 /* extra for skb_copy_bits buffer */) -#define STACK_SIZE STACK_ALIGN(_STACK_SIZE) +#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT) /* Get the offset of eBPF REGISTERs stored on scratch space. */ #define STACK_VAR(off) (STACK_SIZE-off-4) @@ -285,16 +332,19 @@ static inline void emit_mov_i(const u8 rd, u32 val, struct jit_ctx *ctx) emit_mov_i_no8m(rd, val, ctx); } -static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +static void emit_bx_r(u8 tgt_reg, struct jit_ctx *ctx) { - ctx->seen |= SEEN_CALL; -#if __LINUX_ARM_ARCH__ < 5 - emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); - if (elf_hwcap & HWCAP_THUMB) emit(ARM_BX(tgt_reg), ctx); else emit(ARM_MOV_R(ARM_PC, tgt_reg), ctx); +} + +static inline void emit_blx_r(u8 tgt_reg, struct jit_ctx *ctx) +{ +#if __LINUX_ARM_ARCH__ < 5 + emit(ARM_MOV_R(ARM_LR, ARM_PC), ctx); + emit_bx_r(tgt_reg, ctx); #else emit(ARM_BLX_R(tgt_reg), ctx); #endif @@ -354,7 +404,6 @@ static inline void emit_udivmod(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx, u8 op) } /* Call appropriate function */ - ctx->seen |= SEEN_CALL; emit_mov_i(ARM_IP, op == BPF_DIV ? (u32)jit_udiv32 : (u32)jit_mod32, ctx); emit_blx_r(ARM_IP, ctx); @@ -620,8 +669,6 @@ static inline void emit_a32_lsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_SUB_I(ARM_IP, rt, 32), ctx); emit(ARM_RSB_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rm, SRTYPE_ASL, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_IP, ARM_LR, rd, SRTYPE_LSR, tmp2[0]), ctx); @@ -656,8 +703,6 @@ static inline void emit_a32_arsh_r64(const u8 dst[], const u8 src[], bool dstk, /* Do the ARSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); _emit(ARM_COND_MI, ARM_B(0), ctx); @@ -692,8 +737,6 @@ static inline void emit_a32_lsr_r64(const u8 dst[], const u8 src[], bool dstk, /* Do LSH operation */ emit(ARM_RSB_I(ARM_IP, rt, 32), ctx); emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_MOV_SR(ARM_LR, rd, SRTYPE_LSR, rt), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_ASL, ARM_IP), ctx); emit(ARM_ORR_SR(ARM_LR, ARM_LR, rm, SRTYPE_LSR, tmp2[0]), ctx); @@ -828,8 +871,6 @@ static inline void emit_a32_mul_r64(const u8 dst[], const u8 src[], bool dstk, /* Do Multiplication */ emit(ARM_MUL(ARM_IP, rd, rn), ctx); emit(ARM_MUL(ARM_LR, rm, rt), ctx); - /* As we are using ARM_LR */ - ctx->seen |= SEEN_CALL; emit(ARM_ADD_R(ARM_LR, ARM_IP, ARM_LR), ctx); emit(ARM_UMULL(ARM_IP, rm, rd, rt), ctx); @@ -872,33 +913,53 @@ static inline void emit_str_r(const u8 dst, const u8 src, bool dstk, } /* dst = *(size*)(src + off) */ -static inline void emit_ldx_r(const u8 dst, const u8 src, bool dstk, - const s32 off, struct jit_ctx *ctx, const u8 sz){ +static inline void emit_ldx_r(const u8 dst[], const u8 src, bool dstk, + s32 off, struct jit_ctx *ctx, const u8 sz){ const u8 *tmp = bpf2a32[TMP_REG_1]; - u8 rd = dstk ? tmp[1] : dst; + const u8 *rd = dstk ? tmp : dst; u8 rm = src; + s32 off_max; - if (off) { + if (sz == BPF_H) + off_max = 0xff; + else + off_max = 0xfff; + + if (off < 0 || off > off_max) { emit_a32_mov_i(tmp[0], off, false, ctx); emit(ARM_ADD_R(tmp[0], tmp[0], src), ctx); rm = tmp[0]; + off = 0; + } else if (rd[1] == rm) { + emit(ARM_MOV_R(tmp[0], rm), ctx); + rm = tmp[0]; } switch (sz) { - case BPF_W: - /* Load a Word */ - emit(ARM_LDR_I(rd, rm, 0), ctx); + case BPF_B: + /* Load a Byte */ + emit(ARM_LDRB_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; case BPF_H: /* Load a HalfWord */ - emit(ARM_LDRH_I(rd, rm, 0), ctx); + emit(ARM_LDRH_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); break; - case BPF_B: - /* Load a Byte */ - emit(ARM_LDRB_I(rd, rm, 0), ctx); + case BPF_W: + /* Load a Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit_a32_mov_i(dst[0], 0, dstk, ctx); + break; + case BPF_DW: + /* Load a Double Word */ + emit(ARM_LDR_I(rd[1], rm, off), ctx); + emit(ARM_LDR_I(rd[0], rm, off + 4), ctx); break; } if (dstk) - emit(ARM_STR_I(rd, ARM_SP, STACK_VAR(dst)), ctx); + emit(ARM_STR_I(rd[1], ARM_SP, STACK_VAR(dst[1])), ctx); + if (dstk && sz == BPF_DW) + emit(ARM_STR_I(rd[0], ARM_SP, STACK_VAR(dst[0])), ctx); } /* Arithmatic Operation */ @@ -906,7 +967,6 @@ static inline void emit_ar_r(const u8 rd, const u8 rt, const u8 rm, const u8 rn, struct jit_ctx *ctx, u8 op) { switch (op) { case BPF_JSET: - ctx->seen |= SEEN_CALL; emit(ARM_AND_R(ARM_IP, rt, rn), ctx); emit(ARM_AND_R(ARM_LR, rd, rm), ctx); emit(ARM_ORRS_R(ARM_IP, ARM_LR, ARM_IP), ctx); @@ -945,7 +1005,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) const u8 *tcc = bpf2a32[TCALL_CNT]; const int idx0 = ctx->idx; #define cur_offset (ctx->idx - idx0) -#define jmp_offset (out_offset - (cur_offset)) +#define jmp_offset (out_offset - (cur_offset) - 2) u32 off, lo, hi; /* if (index >= array->map.max_entries) @@ -956,7 +1016,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp[1], off, false, ctx); emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r2[1])), ctx); emit(ARM_LDR_R(tmp[1], tmp2[1], tmp[1]), ctx); - /* index (64 bit) */ + /* index is 32-bit for arrays */ emit(ARM_LDR_I(tmp2[1], ARM_SP, STACK_VAR(r3[1])), ctx); /* index >= array->map.max_entries */ emit(ARM_CMP_R(tmp2[1], tmp[1]), ctx); @@ -997,7 +1057,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit_a32_mov_i(tmp2[1], off, false, ctx); emit(ARM_LDR_R(tmp[1], tmp[1], tmp2[1]), ctx); emit(ARM_ADD_I(tmp[1], tmp[1], ctx->prologue_bytes), ctx); - emit(ARM_BX(tmp[1]), ctx); + emit_bx_r(tmp[1], ctx); /* out: */ if (out_offset == -1) @@ -1070,54 +1130,22 @@ static void build_prologue(struct jit_ctx *ctx) const u8 r2 = bpf2a32[BPF_REG_1][1]; const u8 r3 = bpf2a32[BPF_REG_1][0]; const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; const u8 fplo = bpf2a32[BPF_REG_FP][1]; const u8 fphi = bpf2a32[BPF_REG_FP][0]; - const u8 sp = ARM_SP; const u8 *tcc = bpf2a32[TCALL_CNT]; - u16 reg_set = 0; - - /* - * eBPF prog stack layout - * - * high - * original ARM_SP => +-----+ eBPF prologue - * |FP/LR| - * current ARM_FP => +-----+ - * | ... | callee saved registers - * eBPF fp register => +-----+ <= (BPF_FP) - * | ... | eBPF JIT scratch space - * | | eBPF prog stack - * +-----+ - * |RSVD | JIT scratchpad - * current A64_SP => +-----+ <= (BPF_FP - STACK_SIZE) - * | | - * | ... | Function call stack - * | | - * +-----+ - * low - */ - /* Save callee saved registers. */ - reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); #ifdef CONFIG_FRAME_POINTER - reg_set |= (1<<ARM_FP) | (1<<ARM_IP) | (1<<ARM_LR) | (1<<ARM_PC); - emit(ARM_MOV_R(ARM_IP, sp), ctx); + u16 reg_set = CALLEE_PUSH_MASK | 1 << ARM_IP | 1 << ARM_PC; + emit(ARM_MOV_R(ARM_IP, ARM_SP), ctx); emit(ARM_PUSH(reg_set), ctx); emit(ARM_SUB_I(ARM_FP, ARM_IP, 4), ctx); #else - /* Check if call instruction exists in BPF body */ - if (ctx->seen & SEEN_CALL) - reg_set |= (1<<ARM_LR); - emit(ARM_PUSH(reg_set), ctx); + emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); + emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); #endif /* Save frame pointer for later */ - emit(ARM_SUB_I(ARM_IP, sp, SCRATCH_SIZE), ctx); + emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); ctx->stack_size = imm8m(STACK_SIZE); @@ -1140,33 +1168,19 @@ static void build_prologue(struct jit_ctx *ctx) /* end of prologue */ } +/* restore callee saved registers. */ static void build_epilogue(struct jit_ctx *ctx) { - const u8 r4 = bpf2a32[BPF_REG_6][1]; - const u8 r5 = bpf2a32[BPF_REG_6][0]; - const u8 r6 = bpf2a32[TMP_REG_1][1]; - const u8 r7 = bpf2a32[TMP_REG_1][0]; - const u8 r8 = bpf2a32[TMP_REG_2][1]; - const u8 r10 = bpf2a32[TMP_REG_2][0]; - u16 reg_set = 0; - - /* unwind function call stack */ - emit(ARM_ADD_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); - - /* restore callee saved registers. */ - reg_set |= (1<<r4) | (1<<r5) | (1<<r6) | (1<<r7) | (1<<r8) | (1<<r10); #ifdef CONFIG_FRAME_POINTER - /* the first instruction of the prologue was: mov ip, sp */ - reg_set |= (1<<ARM_FP) | (1<<ARM_SP) | (1<<ARM_PC); + /* When using frame pointers, some additional registers need to + * be loaded. */ + u16 reg_set = CALLEE_POP_MASK | 1 << ARM_SP; + emit(ARM_SUB_I(ARM_SP, ARM_FP, hweight16(reg_set) * 4), ctx); emit(ARM_LDM(ARM_SP, reg_set), ctx); #else - if (ctx->seen & SEEN_CALL) - reg_set |= (1<<ARM_PC); /* Restore callee saved registers. */ - emit(ARM_POP(reg_set), ctx); - /* Return back to the callee function */ - if (!(ctx->seen & SEEN_CALL)) - emit(ARM_BX(ARM_LR), ctx); + emit(ARM_MOV_R(ARM_SP, ARM_FP), ctx); + emit(ARM_POP(CALLEE_POP_MASK), ctx); #endif } @@ -1394,8 +1408,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) emit_rev32(rt, rt, ctx); goto emit_bswap_uxt; case 64: - /* Because of the usage of ARM_LR */ - ctx->seen |= SEEN_CALL; emit_rev32(ARM_LR, rt, ctx); emit_rev32(rt, rd, ctx); emit(ARM_MOV_R(rd, ARM_LR), ctx); @@ -1448,22 +1460,7 @@ exit: rn = sstk ? tmp2[1] : src_lo; if (sstk) emit(ARM_LDR_I(rn, ARM_SP, STACK_VAR(src_lo)), ctx); - switch (BPF_SIZE(code)) { - case BPF_W: - /* Load a Word */ - case BPF_H: - /* Load a Half-Word */ - case BPF_B: - /* Load a Byte */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_SIZE(code)); - emit_a32_mov_i(dst_hi, 0, dstk, ctx); - break; - case BPF_DW: - /* Load a double word */ - emit_ldx_r(dst_lo, rn, dstk, off, ctx, BPF_W); - emit_ldx_r(dst_hi, rn, dstk, off+4, ctx, BPF_W); - break; - } + emit_ldx_r(dst, rn, dstk, off, ctx, BPF_SIZE(code)); break; /* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */ case BPF_LD | BPF_ABS | BPF_W: diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi index 7c9bdc7ab50b..9db19314c60c 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi @@ -66,6 +66,7 @@ <&cpu1>, <&cpu2>, <&cpu3>; + interrupt-parent = <&intc>; }; psci { diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index e3b64d03fbd8..9c7724e82aff 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -63,8 +63,10 @@ cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, <&cpm_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cpm_clk 1 3>, <&cpm_clk 1 9>, + <&cpm_clk 1 5>, <&cpm_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk","axi_clk"; marvell,system-controller = <&cpm_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>; + clocks = <&cpm_clk 1 9>, <&cpm_clk 1 5>, + <&cpm_clk 1 6>, <&cpm_clk 1 18>; status = "disabled"; }; @@ -338,8 +341,8 @@ compatible = "marvell,armada-cp110-sdhci"; reg = <0x780000 0x300>; interrupts = <ICU_GRP_NSR 27 IRQ_TYPE_LEVEL_HIGH>; - clock-names = "core"; - clocks = <&cpm_clk 1 4>; + clock-names = "core","axi"; + clocks = <&cpm_clk 1 4>, <&cpm_clk 1 18>; dma-coherent; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi index 0d51096c69f8..87ac68b2cf37 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-slave.dtsi @@ -63,8 +63,10 @@ cps_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; - clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, <&cps_clk 1 5>; - clock-names = "pp_clk", "gop_clk", "mg_clk"; + clocks = <&cps_clk 1 3>, <&cps_clk 1 9>, + <&cps_clk 1 5>, <&cps_clk 1 18>; + clock-names = "pp_clk", "gop_clk", + "mg_clk", "axi_clk"; marvell,system-controller = <&cps_syscon0>; status = "disabled"; dma-coherent; @@ -155,7 +157,8 @@ #size-cells = <0>; compatible = "marvell,orion-mdio"; reg = <0x12a200 0x10>; - clocks = <&cps_clk 1 9>, <&cps_clk 1 5>; + clocks = <&cps_clk 1 9>, <&cps_clk 1 5>, + <&cps_clk 1 6>, <&cps_clk 1 18>; status = "disabled"; }; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index acaa935ed977..cbfe890b0ee5 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -162,7 +162,8 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) -#define PROLOGUE_OFFSET 8 +/* Tail call offset to jump into */ +#define PROLOGUE_OFFSET 7 static int build_prologue(struct jit_ctx *ctx) { @@ -214,19 +215,19 @@ static int build_prologue(struct jit_ctx *ctx) /* Initialize tail_call_cnt */ emit(A64_MOVZ(1, tcc, 0, 0), ctx); - /* 4 byte extra for skb_copy_bits buffer */ - ctx->stack_size = prog->aux->stack_depth + 4; - ctx->stack_size = STACK_ALIGN(ctx->stack_size); - - /* Set up function call stack */ - emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); - cur_offset = ctx->idx - idx0; if (cur_offset != PROLOGUE_OFFSET) { pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n", cur_offset, PROLOGUE_OFFSET); return -1; } + + /* 4 byte extra for skb_copy_bits buffer */ + ctx->stack_size = prog->aux->stack_depth + 4; + ctx->stack_size = STACK_ALIGN(ctx->stack_size); + + /* Set up function call stack */ + emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); return 0; } @@ -274,11 +275,12 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) emit(A64_LDR64(prg, tmp, prg), ctx); emit(A64_CBZ(1, prg, jmp_offset), ctx); - /* goto *(prog->bpf_func + prologue_size); */ + /* goto *(prog->bpf_func + prologue_offset); */ off = offsetof(struct bpf_prog, bpf_func); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR64(tmp, prg, tmp), ctx); emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx); + emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx); emit(A64_BR(tmp), ctx); /* out: */ diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h index 28e02c99be6d..762eeb0fcc1d 100644 --- a/arch/ia64/include/asm/atomic.h +++ b/arch/ia64/include/asm/atomic.h @@ -65,29 +65,30 @@ ia64_atomic_fetch_##op (int i, atomic_t *v) \ ATOMIC_OPS(add, +) ATOMIC_OPS(sub, -) -#define atomic_add_return(i,v) \ +#ifdef __OPTIMIZE__ +#define __ia64_atomic_const(i) __builtin_constant_p(i) ? \ + ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ + (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0 + +#define atomic_add_return(i, v) \ ({ \ - int __ia64_aar_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_aar_i == 1) || (__ia64_aar_i == 4) \ - || (__ia64_aar_i == 8) || (__ia64_aar_i == 16) \ - || (__ia64_aar_i == -1) || (__ia64_aar_i == -4) \ - || (__ia64_aar_i == -8) || (__ia64_aar_i == -16))) \ - ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ - : ia64_atomic_add(__ia64_aar_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(__i, &(v)->counter) : \ + ia64_atomic_add(__i, v); \ }) -#define atomic_sub_return(i,v) \ +#define atomic_sub_return(i, v) \ ({ \ - int __ia64_asr_i = (i); \ - (__builtin_constant_p(i) \ - && ( (__ia64_asr_i == 1) || (__ia64_asr_i == 4) \ - || (__ia64_asr_i == 8) || (__ia64_asr_i == 16) \ - || (__ia64_asr_i == -1) || (__ia64_asr_i == -4) \ - || (__ia64_asr_i == -8) || (__ia64_asr_i == -16))) \ - ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ - : ia64_atomic_sub(__ia64_asr_i, v); \ + int __i = (i); \ + static const int __ia64_atomic_p = __ia64_atomic_const(i); \ + __ia64_atomic_p ? ia64_fetch_and_add(-__i, &(v)->counter) : \ + ia64_atomic_sub(__i, v); \ }) +#else +#define atomic_add_return(i, v) ia64_atomic_add(i, v) +#define atomic_sub_return(i, v) ia64_atomic_sub(i, v) +#endif #define atomic_fetch_add(i,v) \ ({ \ diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c51e6ce42e7a..2ed525a44734 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -166,6 +166,7 @@ config PPC select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE select GENERIC_CPU_AUTOPROBE + select GENERIC_CPU_VULNERABILITIES if PPC_BOOK3S_64 select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_SMP_IDLE_THREAD diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index f0461618bf7b..eca3f9c68907 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -353,6 +353,7 @@ #define PROC_TABLE_GTSE 0x01 #ifndef __ASSEMBLY__ +#include <linux/types.h> /** * plpar_hcall_norets: - Make a pseries hypervisor call with no return arguments diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 9d213542a48b..8fd3a70047f1 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 491be4179ddd..e67413f4a8f0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -38,6 +38,7 @@ #include <linux/memory.h> #include <linux/nmi.h> +#include <asm/debugfs.h> #include <asm/io.h> #include <asm/kdump.h> #include <asm/prom.h> @@ -901,4 +902,41 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) if (!no_rfi_flush) rfi_flush_enable(enable); } + +#ifdef CONFIG_DEBUG_FS +static int rfi_flush_set(void *data, u64 val) +{ + if (val == 1) + rfi_flush_enable(true); + else if (val == 0) + rfi_flush_enable(false); + else + return -EINVAL; + + return 0; +} + +static int rfi_flush_get(void *data, u64 *val) +{ + *val = rfi_flush ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_rfi_flush, rfi_flush_get, rfi_flush_set, "%llu\n"); + +static __init int rfi_flush_debugfs_init(void) +{ + debugfs_create_file("rfi_flush", 0600, powerpc_debugfs_root, NULL, &fops_rfi_flush); + return 0; +} +device_initcall(rfi_flush_debugfs_init); +#endif + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (rfi_flush) + return sprintf(buf, "Mitigation: RFI Flush\n"); + + return sprintf(buf, "Vulnerable\n"); +} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index cab24f549e7c..0ddc7ac6c5f1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2344,10 +2344,10 @@ static void dump_one_paca(int cpu) DUMP(p, kernel_toc, "lx"); DUMP(p, kernelbase, "lx"); DUMP(p, kernel_msr, "lx"); - DUMP(p, emergency_sp, "p"); + DUMP(p, emergency_sp, "px"); #ifdef CONFIG_PPC_BOOK3S_64 - DUMP(p, nmi_emergency_sp, "p"); - DUMP(p, mc_emergency_sp, "p"); + DUMP(p, nmi_emergency_sp, "px"); + DUMP(p, mc_emergency_sp, "px"); DUMP(p, in_nmi, "x"); DUMP(p, in_mce, "x"); DUMP(p, hmi_event_available, "x"); @@ -2375,17 +2375,21 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); + DUMP(p, l1d_flush_congruence, "llx"); + DUMP(p, l1d_flush_sets, "llx"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E - DUMP(p, pgd, "p"); - DUMP(p, kernel_pgd, "p"); - DUMP(p, tcd_ptr, "p"); - DUMP(p, mc_kstack, "p"); - DUMP(p, crit_kstack, "p"); - DUMP(p, dbg_kstack, "p"); + DUMP(p, pgd, "px"); + DUMP(p, kernel_pgd, "px"); + DUMP(p, tcd_ptr, "px"); + DUMP(p, mc_kstack, "px"); + DUMP(p, crit_kstack, "px"); + DUMP(p, dbg_kstack, "px"); #endif - DUMP(p, __current, "p"); + DUMP(p, __current, "px"); DUMP(p, kstack, "lx"); printf(" kstack_base = 0x%016lx\n", p->kstack & ~(THREAD_SIZE - 1)); DUMP(p, stab_rr, "lx"); @@ -2403,7 +2407,7 @@ static void dump_one_paca(int cpu) #endif #ifdef CONFIG_PPC_POWERNV - DUMP(p, core_idle_state_ptr, "p"); + DUMP(p, core_idle_state_ptr, "px"); DUMP(p, thread_idle_state, "x"); DUMP(p, thread_mask, "x"); DUMP(p, subcore_sibling_mask, "x"); diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a54f23a..60c4c342316c 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl %ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl %esi popl %edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 4f8e1d35a97c..aa15b4c0e3d1 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -491,6 +491,17 @@ ENTRY(__switch_to_asm) movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* + * When switching from a shallower to a deeper call stack + * the RSB may either underflow or use entries populated + * with userspace addresses. On CPUs where those concerns + * exist, overwrite the RSB with entries which capture + * speculative execution to prevent attack. + */ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq %r15 popq %r14 diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 005908ee9333..a2efb490f743 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -755,14 +755,14 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_IVYBRIDGE_X, snbep_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_CORE, hsw_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_X, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_ULT, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_HASWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_CORE, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_GT3E, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, hsx_rapl_init), - X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsx_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index a9e57f08bfa6..98722773391d 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -136,6 +136,7 @@ extern void disconnect_bsp_APIC(int virt_wire_setup); extern void disable_local_APIC(void); extern void lapic_shutdown(void); extern void sync_Arb_IDs(void); +extern void init_bsp_APIC(void); extern void apic_intr_mode_init(void); extern void setup_local_APIC(void); extern void init_apic_mappings(void); diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447862f4..25b9375c1484 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -206,11 +206,11 @@ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -245,6 +245,7 @@ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ #define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ #define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ #define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index c9459a4c3c68..22c5f3e6f820 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -39,7 +39,7 @@ void __init sme_unmap_bootdata(char *real_mode_data); void __init sme_early_init(void); -void __init sme_encrypt_kernel(void); +void __init sme_encrypt_kernel(struct boot_params *bp); void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); @@ -67,7 +67,7 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { } static inline void __init sme_early_init(void) { } -static inline void __init sme_encrypt_kernel(void) { } +static inline void __init sme_encrypt_kernel(struct boot_params *bp) { } static inline void __init sme_enable(struct boot_params *bp) { } static inline bool sme_active(void) { return false; } diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 402a11c803c3..7b45d8424150 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -11,7 +11,7 @@ * Fill the CPU return stack buffer. * * Each entry in the RSB, if used for a speculative 'ret', contains an - * infinite 'pause; jmp' loop to capture speculative execution. + * infinite 'pause; lfence; jmp' loop to capture speculative execution. * * This is required in various cases for retpoline and IBRS-based * mitigations for the Spectre variant 2 vulnerability. Sometimes to @@ -38,11 +38,13 @@ call 772f; \ 773: /* speculation trap */ \ pause; \ + lfence; \ jmp 773b; \ 772: \ call 774f; \ 775: /* speculation trap */ \ pause; \ + lfence; \ jmp 775b; \ 774: \ dec reg; \ @@ -73,6 +75,7 @@ call .Ldo_rop_\@ .Lspec_trap_\@: pause + lfence jmp .Lspec_trap_\@ .Ldo_rop_\@: mov \reg, (%_ASM_SP) @@ -165,6 +168,7 @@ " .align 16\n" \ "901: call 903f;\n" \ "902: pause;\n" \ + " lfence;\n" \ " jmp 902b;\n" \ " .align 16\n" \ "903: addl $4, %%esp;\n" \ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 880441f24146..25ddf02598d2 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1286,6 +1286,55 @@ static int __init apic_intr_mode_select(void) return APIC_SYMMETRIC_IO; } +/* + * An initial setup of the virtual wire mode. + */ +void __init init_bsp_APIC(void) +{ + unsigned int value; + + /* + * Don't do the setup now if we have a SMP BIOS as the + * through-I/O-APIC virtual wire mode might be active. + */ + if (smp_found_config || !boot_cpu_has(X86_FEATURE_APIC)) + return; + + /* + * Do not trust the local APIC being empty at bootup. + */ + clear_local_APIC(); + + /* + * Enable APIC. + */ + value = apic_read(APIC_SPIV); + value &= ~APIC_VECTOR_MASK; + value |= APIC_SPIV_APIC_ENABLED; + +#ifdef CONFIG_X86_32 + /* This bit is reserved on P4/Xeon and should be cleared */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && + (boot_cpu_data.x86 == 15)) + value &= ~APIC_SPIV_FOCUS_DISABLED; + else +#endif + value |= APIC_SPIV_FOCUS_DISABLED; + value |= SPURIOUS_APIC_VECTOR; + apic_write(APIC_SPIV, value); + + /* + * Set up the virtual wire mode. + */ + apic_write(APIC_LVT0, APIC_DM_EXTINT); + value = APIC_DM_NMI; + if (!lapic_is_integrated()) /* 82489DX */ + value |= APIC_LVT_LEVEL_TRIGGER; + if (apic_extnmi == APIC_EXTNMI_NONE) + value |= APIC_LVT_MASKED; + apic_write(APIC_LVT1, value); +} + /* Init the interrupt delivery mode for the BSP */ void __init apic_intr_mode_init(void) { diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index f8b03bb8e725..3cc471beb50b 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -542,14 +542,17 @@ static int x86_vector_alloc_irqs(struct irq_domain *domain, unsigned int virq, err = assign_irq_vector_policy(irqd, info); trace_vector_setup(virq + i, false, err); - if (err) + if (err) { + irqd->chip_data = NULL; + free_apic_chip_data(apicd); goto error; + } } return 0; error: - x86_vector_free_irqs(domain, virq, i + 1); + x86_vector_free_irqs(domain, virq, i); return err; } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc26185aa7..390b3dc3d438 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include <asm/alternative.h> #include <asm/pgtable.h> #include <asm/set_memory.h> +#include <asm/intel-family.h> static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -213,6 +231,24 @@ retpoline_auto: spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* + * If neither SMEP or KPTI are available, there is a risk of + * hitting userspace addresses in the RSB after a context switch + * from a shallow call stack to a deeper one. To prevent this fill + * the entire RSB, even when using IBRS. + * + * Skylake era CPUs have a separate issue with *underflow* of the + * RSB, when they will predict 'ret' targets from the generic BTB. + * The proper mitigation for this is IBRS. If IBRS is not supported + * or deactivated in favour of retpolines the RSB fill on context + * switch is required. + */ + if ((!boot_cpu_has(X86_FEATURE_PTI) && + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); + pr_info("Filling RSB on context switch\n"); + } } #undef pr_fmt diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 88dcf8479013..99442370de40 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -525,10 +525,6 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) */ if (static_branch_unlikely(&rdt_mon_enable_key)) rmdir_mondata_subdir_allrdtgrp(r, d->id); - kfree(d->ctrl_val); - kfree(d->rmid_busy_llc); - kfree(d->mbm_total); - kfree(d->mbm_local); list_del(&d->list); if (is_mbm_enabled()) cancel_delayed_work(&d->mbm_over); @@ -545,6 +541,10 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) cancel_delayed_work(&d->cqm_limbo); } + kfree(d->ctrl_val); + kfree(d->rmid_busy_llc); + kfree(d->mbm_total); + kfree(d->mbm_local); kfree(d); return; } diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 05459ad3db46..d0e69769abfd 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -21,7 +21,6 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, - { X86_FEATURE_INTEL_PT, CPUID_EBX, 25, 0x00000007, 0 }, { X86_FEATURE_AVX512_4VNNIW, CPUID_EDX, 2, 0x00000007, 0 }, { X86_FEATURE_AVX512_4FMAPS, CPUID_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_CAT_L3, CPUID_EBX, 1, 0x00000010, 0 }, diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 6a5d757b9cfd..7ba5d819ebe3 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -157,8 +157,8 @@ unsigned long __head __startup_64(unsigned long physaddr, p = fixup_pointer(&phys_base, physaddr); *p += load_delta - sme_get_me_mask(); - /* Encrypt the kernel (if SME is active) */ - sme_encrypt_kernel(); + /* Encrypt the kernel and related (if SME is active) */ + sme_encrypt_kernel(bp); /* * Return the SME encryption mask (if SME is active) to be used as a diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index d985cef3984f..56d99be3706a 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -56,7 +56,7 @@ struct idt_data { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_idts[] = { +static const __initconst struct idt_data early_idts[] = { INTG(X86_TRAP_DB, debug), SYSG(X86_TRAP_BP, int3), #ifdef CONFIG_X86_32 @@ -70,7 +70,7 @@ static const __initdata struct idt_data early_idts[] = { * the traps which use them are reinitialized with IST after cpu_init() has * set up TSS. */ -static const __initdata struct idt_data def_idts[] = { +static const __initconst struct idt_data def_idts[] = { INTG(X86_TRAP_DE, divide_error), INTG(X86_TRAP_NMI, nmi), INTG(X86_TRAP_BR, bounds), @@ -108,7 +108,7 @@ static const __initdata struct idt_data def_idts[] = { /* * The APIC and SMP idt entries */ -static const __initdata struct idt_data apic_idts[] = { +static const __initconst struct idt_data apic_idts[] = { #ifdef CONFIG_SMP INTG(RESCHEDULE_VECTOR, reschedule_interrupt), INTG(CALL_FUNCTION_VECTOR, call_function_interrupt), @@ -150,7 +150,7 @@ static const __initdata struct idt_data apic_idts[] = { * Early traps running on the DEFAULT_STACK because the other interrupt * stacks work only after cpu_init(). */ -static const __initdata struct idt_data early_pf_idts[] = { +static const __initconst struct idt_data early_pf_idts[] = { INTG(X86_TRAP_PF, page_fault), }; @@ -158,7 +158,7 @@ static const __initdata struct idt_data early_pf_idts[] = { * Override for the debug_idt. Same as the default, but with interrupt * stack set to DEFAULT_STACK (0). Required for NMI trap handling. */ -static const __initdata struct idt_data dbg_idts[] = { +static const __initconst struct idt_data dbg_idts[] = { INTG(X86_TRAP_DB, debug), INTG(X86_TRAP_BP, int3), }; @@ -180,7 +180,7 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss; * The exceptions which use Interrupt stacks. They are setup after * cpu_init() when the TSS has been initialized. */ -static const __initdata struct idt_data ist_idts[] = { +static const __initconst struct idt_data ist_idts[] = { ISTG(X86_TRAP_DB, debug, DEBUG_STACK), ISTG(X86_TRAP_NMI, nmi, NMI_STACK), SISTG(X86_TRAP_BP, int3, DEBUG_STACK), diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 8da3e909e967..a539410c4ea9 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c @@ -61,6 +61,9 @@ void __init init_ISA_irqs(void) struct irq_chip *chip = legacy_pic->chip; int i; +#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) + init_bsp_APIC(); +#endif legacy_pic->init(0); for (i = 0; i < nr_legacy_irqs(); i++) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 145810b0edf6..68d7ab81c62f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -364,16 +364,6 @@ static void __init reserve_initrd(void) !ramdisk_image || !ramdisk_size) return; /* No initrd provided by bootloader */ - /* - * If SME is active, this memory will be marked encrypted by the - * kernel when it is accessed (including relocation). However, the - * ramdisk image was loaded decrypted by the bootloader, so make - * sure that it is encrypted before accessing it. For SEV the - * ramdisk will already be encrypted, so only do this for SME. - */ - if (sme_active()) - sme_early_encrypt(ramdisk_image, ramdisk_end - ramdisk_image); - initrd_start = 0; mapped_size = memblock_mem_size(max_pfn_mapped); diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 8ea117f8142e..e169e85db434 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -602,7 +602,6 @@ unsigned long native_calibrate_tsc(void) case INTEL_FAM6_KABYLAKE_DESKTOP: crystal_khz = 24000; /* 24.0 MHz */ break; - case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_DENVERTON: crystal_khz = 25000; /* 25.0 MHz */ break; @@ -612,6 +611,8 @@ unsigned long native_calibrate_tsc(void) } } + if (crystal_khz == 0) + return 0; /* * TSC frequency determined by CPUID is a "hardware reported" * frequency and is the most accurate one so far we have. This @@ -1315,6 +1316,12 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + if (cpu_khz != tsc_khz) { + pr_info("Detected %lu.%03lu MHz TSC", + (unsigned long)tsc_khz / 1000, + (unsigned long)tsc_khz % 1000); + } + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ tsc_store_and_check_tsc_adjust(true); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 06fe3d51d385..b3e40773dce0 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -172,14 +172,15 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr) * 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really * faulted on a pte with its pkey=4. */ -static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey) +static void fill_sig_info_pkey(int si_signo, int si_code, siginfo_t *info, + u32 *pkey) { /* This is effectively an #ifdef */ if (!boot_cpu_has(X86_FEATURE_OSPKE)) return; /* Fault not from Protection Keys: nothing to do */ - if (si_code != SEGV_PKUERR) + if ((si_code != SEGV_PKUERR) || (si_signo != SIGSEGV)) return; /* * force_sig_info_fault() is called from a number of @@ -218,7 +219,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, lsb = PAGE_SHIFT; info.si_addr_lsb = lsb; - fill_sig_info_pkey(si_code, &info, pkey); + fill_sig_info_pkey(si_signo, si_code, &info, pkey); force_sig_info(si_signo, &info, tsk); } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 47388f0c0e59..af6f2f9c6a26 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -21,10 +21,14 @@ extern struct range pfn_mapped[E820_MAX_ENTRIES]; static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE); -static __init void *early_alloc(size_t size, int nid) +static __init void *early_alloc(size_t size, int nid, bool panic) { - return memblock_virt_alloc_try_nid_nopanic(size, size, - __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + if (panic) + return memblock_virt_alloc_try_nid(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); + else + return memblock_virt_alloc_try_nid_nopanic(size, size, + __pa(MAX_DMA_ADDRESS), BOOTMEM_ALLOC_ACCESSIBLE, nid); } static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, @@ -38,14 +42,14 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (boot_cpu_has(X86_FEATURE_PSE) && ((end - addr) == PMD_SIZE) && IS_ALIGNED(addr, PMD_SIZE)) { - p = early_alloc(PMD_SIZE, nid); + p = early_alloc(PMD_SIZE, nid, false); if (p && pmd_set_huge(pmd, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PMD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pmd_populate_kernel(&init_mm, pmd, p); } @@ -57,7 +61,7 @@ static void __init kasan_populate_pmd(pmd_t *pmd, unsigned long addr, if (!pte_none(*pte)) continue; - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); entry = pfn_pte(PFN_DOWN(__pa(p)), PAGE_KERNEL); set_pte_at(&init_mm, addr, pte, entry); } while (pte++, addr += PAGE_SIZE, addr != end); @@ -75,14 +79,14 @@ static void __init kasan_populate_pud(pud_t *pud, unsigned long addr, if (boot_cpu_has(X86_FEATURE_GBPAGES) && ((end - addr) == PUD_SIZE) && IS_ALIGNED(addr, PUD_SIZE)) { - p = early_alloc(PUD_SIZE, nid); + p = early_alloc(PUD_SIZE, nid, false); if (p && pud_set_huge(pud, __pa(p), PAGE_KERNEL)) return; else if (p) memblock_free(__pa(p), PUD_SIZE); } - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pud_populate(&init_mm, pud, p); } @@ -101,7 +105,7 @@ static void __init kasan_populate_p4d(p4d_t *p4d, unsigned long addr, unsigned long next; if (p4d_none(*p4d)) { - void *p = early_alloc(PAGE_SIZE, nid); + void *p = early_alloc(PAGE_SIZE, nid, true); p4d_populate(&init_mm, p4d, p); } @@ -122,7 +126,7 @@ static void __init kasan_populate_pgd(pgd_t *pgd, unsigned long addr, unsigned long next; if (pgd_none(*pgd)) { - p = early_alloc(PAGE_SIZE, nid); + p = early_alloc(PAGE_SIZE, nid, true); pgd_populate(&init_mm, pgd, p); } diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 391b13402e40..3ef362f598e3 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -464,37 +464,62 @@ void swiotlb_set_mem_attributes(void *vaddr, unsigned long size) set_memory_decrypted((unsigned long)vaddr, size >> PAGE_SHIFT); } -static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start, - unsigned long end) +struct sme_populate_pgd_data { + void *pgtable_area; + pgd_t *pgd; + + pmdval_t pmd_flags; + pteval_t pte_flags; + unsigned long paddr; + + unsigned long vaddr; + unsigned long vaddr_end; +}; + +static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd) { unsigned long pgd_start, pgd_end, pgd_size; pgd_t *pgd_p; - pgd_start = start & PGDIR_MASK; - pgd_end = end & PGDIR_MASK; + pgd_start = ppd->vaddr & PGDIR_MASK; + pgd_end = ppd->vaddr_end & PGDIR_MASK; - pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1); - pgd_size *= sizeof(pgd_t); + pgd_size = (((pgd_end - pgd_start) / PGDIR_SIZE) + 1) * sizeof(pgd_t); - pgd_p = pgd_base + pgd_index(start); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); memset(pgd_p, 0, pgd_size); } -#define PGD_FLAGS _KERNPG_TABLE_NOENC -#define P4D_FLAGS _KERNPG_TABLE_NOENC -#define PUD_FLAGS _KERNPG_TABLE_NOENC -#define PMD_FLAGS (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) +#define PGD_FLAGS _KERNPG_TABLE_NOENC +#define P4D_FLAGS _KERNPG_TABLE_NOENC +#define PUD_FLAGS _KERNPG_TABLE_NOENC +#define PMD_FLAGS _KERNPG_TABLE_NOENC + +#define PMD_FLAGS_LARGE (__PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL) + +#define PMD_FLAGS_DEC PMD_FLAGS_LARGE +#define PMD_FLAGS_DEC_WP ((PMD_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PMD_FLAGS_ENC (PMD_FLAGS_LARGE | _PAGE_ENC) + +#define PTE_FLAGS (__PAGE_KERNEL_EXEC & ~_PAGE_GLOBAL) + +#define PTE_FLAGS_DEC PTE_FLAGS +#define PTE_FLAGS_DEC_WP ((PTE_FLAGS_DEC & ~_PAGE_CACHE_MASK) | \ + (_PAGE_PAT | _PAGE_PWT)) + +#define PTE_FLAGS_ENC (PTE_FLAGS | _PAGE_ENC) -static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, - unsigned long vaddr, pmdval_t pmd_val) +static pmd_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd) { pgd_t *pgd_p; p4d_t *p4d_p; pud_t *pud_p; pmd_t *pmd_p; - pgd_p = pgd_base + pgd_index(vaddr); + pgd_p = ppd->pgd + pgd_index(ppd->vaddr); if (native_pgd_val(*pgd_p)) { if (IS_ENABLED(CONFIG_X86_5LEVEL)) p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK); @@ -504,15 +529,15 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, pgd_t pgd; if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p = pgtable_area; + p4d_p = ppd->pgtable_area; memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D); - pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; + ppd->pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D; pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS); } else { - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS); } @@ -520,58 +545,160 @@ static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area, } if (IS_ENABLED(CONFIG_X86_5LEVEL)) { - p4d_p += p4d_index(vaddr); + p4d_p += p4d_index(ppd->vaddr); if (native_p4d_val(*p4d_p)) { pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK); } else { p4d_t p4d; - pud_p = pgtable_area; + pud_p = ppd->pgtable_area; memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD); - pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; + ppd->pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD; p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS); native_set_p4d(p4d_p, p4d); } } - pud_p += pud_index(vaddr); + pud_p += pud_index(ppd->vaddr); if (native_pud_val(*pud_p)) { if (native_pud_val(*pud_p) & _PAGE_PSE) - goto out; + return NULL; pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK); } else { pud_t pud; - pmd_p = pgtable_area; + pmd_p = ppd->pgtable_area; memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD); - pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; + ppd->pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD; pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS); native_set_pud(pud_p, pud); } - pmd_p += pmd_index(vaddr); + return pmd_p; +} + +static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd) +{ + pmd_t *pmd_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + + pmd_p += pmd_index(ppd->vaddr); if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE)) - native_set_pmd(pmd_p, native_make_pmd(pmd_val)); + native_set_pmd(pmd_p, native_make_pmd(ppd->paddr | ppd->pmd_flags)); +} -out: - return pgtable_area; +static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd) +{ + pmd_t *pmd_p; + pte_t *pte_p; + + pmd_p = sme_prepare_pgd(ppd); + if (!pmd_p) + return; + + pmd_p += pmd_index(ppd->vaddr); + if (native_pmd_val(*pmd_p)) { + if (native_pmd_val(*pmd_p) & _PAGE_PSE) + return; + + pte_p = (pte_t *)(native_pmd_val(*pmd_p) & ~PTE_FLAGS_MASK); + } else { + pmd_t pmd; + + pte_p = ppd->pgtable_area; + memset(pte_p, 0, sizeof(*pte_p) * PTRS_PER_PTE); + ppd->pgtable_area += sizeof(*pte_p) * PTRS_PER_PTE; + + pmd = native_make_pmd((pteval_t)pte_p + PMD_FLAGS); + native_set_pmd(pmd_p, pmd); + } + + pte_p += pte_index(ppd->vaddr); + if (!native_pte_val(*pte_p)) + native_set_pte(pte_p, native_make_pte(ppd->paddr | ppd->pte_flags)); +} + +static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd_large(ppd); + + ppd->vaddr += PMD_PAGE_SIZE; + ppd->paddr += PMD_PAGE_SIZE; + } +} + +static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd) +{ + while (ppd->vaddr < ppd->vaddr_end) { + sme_populate_pgd(ppd); + + ppd->vaddr += PAGE_SIZE; + ppd->paddr += PAGE_SIZE; + } +} + +static void __init __sme_map_range(struct sme_populate_pgd_data *ppd, + pmdval_t pmd_flags, pteval_t pte_flags) +{ + unsigned long vaddr_end; + + ppd->pmd_flags = pmd_flags; + ppd->pte_flags = pte_flags; + + /* Save original end value since we modify the struct value */ + vaddr_end = ppd->vaddr_end; + + /* If start is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE); + __sme_map_range_pte(ppd); + + /* Create PMD entries */ + ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK; + __sme_map_range_pmd(ppd); + + /* If end is not 2MB aligned, create PTE entries */ + ppd->vaddr_end = vaddr_end; + __sme_map_range_pte(ppd); +} + +static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC); +} + +static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC); +} + +static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd) +{ + __sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP); } static unsigned long __init sme_pgtable_calc(unsigned long len) { - unsigned long p4d_size, pud_size, pmd_size; + unsigned long p4d_size, pud_size, pmd_size, pte_size; unsigned long total; /* * Perform a relatively simplistic calculation of the pagetable - * entries that are needed. That mappings will be covered by 2MB - * PMD entries so we can conservatively calculate the required + * entries that are needed. Those mappings will be covered mostly + * by 2MB PMD entries so we can conservatively calculate the required * number of P4D, PUD and PMD structures needed to perform the - * mappings. Incrementing the count for each covers the case where - * the addresses cross entries. + * mappings. For mappings that are not 2MB aligned, PTE mappings + * would be needed for the start and end portion of the address range + * that fall outside of the 2MB alignment. This results in, at most, + * two extra pages to hold PTE entries for each range that is mapped. + * Incrementing the count for each covers the case where the addresses + * cross entries. */ if (IS_ENABLED(CONFIG_X86_5LEVEL)) { p4d_size = (ALIGN(len, PGDIR_SIZE) / PGDIR_SIZE) + 1; @@ -585,8 +712,9 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) } pmd_size = (ALIGN(len, PUD_SIZE) / PUD_SIZE) + 1; pmd_size *= sizeof(pmd_t) * PTRS_PER_PMD; + pte_size = 2 * sizeof(pte_t) * PTRS_PER_PTE; - total = p4d_size + pud_size + pmd_size; + total = p4d_size + pud_size + pmd_size + pte_size; /* * Now calculate the added pagetable structures needed to populate @@ -610,29 +738,29 @@ static unsigned long __init sme_pgtable_calc(unsigned long len) return total; } -void __init sme_encrypt_kernel(void) +void __init sme_encrypt_kernel(struct boot_params *bp) { unsigned long workarea_start, workarea_end, workarea_len; unsigned long execute_start, execute_end, execute_len; unsigned long kernel_start, kernel_end, kernel_len; + unsigned long initrd_start, initrd_end, initrd_len; + struct sme_populate_pgd_data ppd; unsigned long pgtable_area_len; - unsigned long paddr, pmd_flags; unsigned long decrypted_base; - void *pgtable_area; - pgd_t *pgd; if (!sme_active()) return; /* - * Prepare for encrypting the kernel by building new pagetables with - * the necessary attributes needed to encrypt the kernel in place. + * Prepare for encrypting the kernel and initrd by building new + * pagetables with the necessary attributes needed to encrypt the + * kernel in place. * * One range of virtual addresses will map the memory occupied - * by the kernel as encrypted. + * by the kernel and initrd as encrypted. * * Another range of virtual addresses will map the memory occupied - * by the kernel as decrypted and write-protected. + * by the kernel and initrd as decrypted and write-protected. * * The use of write-protect attribute will prevent any of the * memory from being cached. @@ -643,6 +771,20 @@ void __init sme_encrypt_kernel(void) kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE); kernel_len = kernel_end - kernel_start; + initrd_start = 0; + initrd_end = 0; + initrd_len = 0; +#ifdef CONFIG_BLK_DEV_INITRD + initrd_len = (unsigned long)bp->hdr.ramdisk_size | + ((unsigned long)bp->ext_ramdisk_size << 32); + if (initrd_len) { + initrd_start = (unsigned long)bp->hdr.ramdisk_image | + ((unsigned long)bp->ext_ramdisk_image << 32); + initrd_end = PAGE_ALIGN(initrd_start + initrd_len); + initrd_len = initrd_end - initrd_start; + } +#endif + /* Set the encryption workarea to be immediately after the kernel */ workarea_start = kernel_end; @@ -665,16 +807,21 @@ void __init sme_encrypt_kernel(void) */ pgtable_area_len = sizeof(pgd_t) * PTRS_PER_PGD; pgtable_area_len += sme_pgtable_calc(execute_end - kernel_start) * 2; + if (initrd_len) + pgtable_area_len += sme_pgtable_calc(initrd_len) * 2; /* PUDs and PMDs needed in the current pagetables for the workarea */ pgtable_area_len += sme_pgtable_calc(execute_len + pgtable_area_len); /* * The total workarea includes the executable encryption area and - * the pagetable area. + * the pagetable area. The start of the workarea is already 2MB + * aligned, align the end of the workarea on a 2MB boundary so that + * we don't try to create/allocate PTE entries from the workarea + * before it is mapped. */ workarea_len = execute_len + pgtable_area_len; - workarea_end = workarea_start + workarea_len; + workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE); /* * Set the address to the start of where newly created pagetable @@ -683,45 +830,30 @@ void __init sme_encrypt_kernel(void) * pagetables and when the new encrypted and decrypted kernel * mappings are populated. */ - pgtable_area = (void *)execute_end; + ppd.pgtable_area = (void *)execute_end; /* * Make sure the current pagetable structure has entries for * addressing the workarea. */ - pgd = (pgd_t *)native_read_cr3_pa(); - paddr = workarea_start; - while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); - - paddr += PMD_PAGE_SIZE; - } + ppd.pgd = (pgd_t *)native_read_cr3_pa(); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); /* * A new pagetable structure is being built to allow for the kernel - * to be encrypted. It starts with an empty PGD that will then be - * populated with new PUDs and PMDs as the encrypted and decrypted - * kernel mappings are created. + * and initrd to be encrypted. It starts with an empty PGD that will + * then be populated with new PUDs and PMDs as the encrypted and + * decrypted kernel mappings are created. */ - pgd = pgtable_area; - memset(pgd, 0, sizeof(*pgd) * PTRS_PER_PGD); - pgtable_area += sizeof(*pgd) * PTRS_PER_PGD; - - /* Add encrypted kernel (identity) mappings */ - pmd_flags = PMD_FLAGS | _PAGE_ENC; - paddr = kernel_start; - while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + pmd_flags); - - paddr += PMD_PAGE_SIZE; - } + ppd.pgd = ppd.pgtable_area; + memset(ppd.pgd, 0, sizeof(pgd_t) * PTRS_PER_PGD); + ppd.pgtable_area += sizeof(pgd_t) * PTRS_PER_PGD; /* * A different PGD index/entry must be used to get different @@ -730,47 +862,79 @@ void __init sme_encrypt_kernel(void) * the base of the mapping. */ decrypted_base = (pgd_index(workarea_end) + 1) & (PTRS_PER_PGD - 1); + if (initrd_len) { + unsigned long check_base; + + check_base = (pgd_index(initrd_end) + 1) & (PTRS_PER_PGD - 1); + decrypted_base = max(decrypted_base, check_base); + } decrypted_base <<= PGDIR_SHIFT; + /* Add encrypted kernel (identity) mappings */ + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start; + ppd.vaddr_end = kernel_end; + sme_map_range_encrypted(&ppd); + /* Add decrypted, write-protected kernel (non-identity) mappings */ - pmd_flags = (PMD_FLAGS & ~_PAGE_CACHE_MASK) | (_PAGE_PAT | _PAGE_PWT); - paddr = kernel_start; - while (paddr < kernel_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + pmd_flags); - - paddr += PMD_PAGE_SIZE; + ppd.paddr = kernel_start; + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); + + if (initrd_len) { + /* Add encrypted initrd (identity) mappings */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start; + ppd.vaddr_end = initrd_end; + sme_map_range_encrypted(&ppd); + /* + * Add decrypted, write-protected initrd (non-identity) mappings + */ + ppd.paddr = initrd_start; + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_map_range_decrypted_wp(&ppd); } /* Add decrypted workarea mappings to both kernel mappings */ - paddr = workarea_start; - while (paddr < workarea_end) { - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr, - paddr + PMD_FLAGS); + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start; + ppd.vaddr_end = workarea_end; + sme_map_range_decrypted(&ppd); - pgtable_area = sme_populate_pgd(pgd, pgtable_area, - paddr + decrypted_base, - paddr + PMD_FLAGS); - - paddr += PMD_PAGE_SIZE; - } + ppd.paddr = workarea_start; + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_map_range_decrypted(&ppd); /* Perform the encryption */ sme_encrypt_execute(kernel_start, kernel_start + decrypted_base, - kernel_len, workarea_start, (unsigned long)pgd); + kernel_len, workarea_start, (unsigned long)ppd.pgd); + + if (initrd_len) + sme_encrypt_execute(initrd_start, initrd_start + decrypted_base, + initrd_len, workarea_start, + (unsigned long)ppd.pgd); /* * At this point we are running encrypted. Remove the mappings for * the decrypted areas - all that is needed for this is to remove * the PGD entry/entries. */ - sme_clear_pgd(pgd, kernel_start + decrypted_base, - kernel_end + decrypted_base); + ppd.vaddr = kernel_start + decrypted_base; + ppd.vaddr_end = kernel_end + decrypted_base; + sme_clear_pgd(&ppd); + + if (initrd_len) { + ppd.vaddr = initrd_start + decrypted_base; + ppd.vaddr_end = initrd_end + decrypted_base; + sme_clear_pgd(&ppd); + } - sme_clear_pgd(pgd, workarea_start + decrypted_base, - workarea_end + decrypted_base); + ppd.vaddr = workarea_start + decrypted_base; + ppd.vaddr_end = workarea_end + decrypted_base; + sme_clear_pgd(&ppd); /* Flush the TLB - no globals so cr3 is enough */ native_write_cr3(__native_read_cr3()); diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 730e6d541df1..01f682cf77a8 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -22,9 +22,9 @@ ENTRY(sme_encrypt_execute) /* * Entry parameters: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping - * RDX - length of kernel + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping + * RDX - length to encrypt * RCX - virtual address of the encryption workarea, including: * - stack page (PAGE_SIZE) * - encryption routine page (PAGE_SIZE) @@ -41,9 +41,9 @@ ENTRY(sme_encrypt_execute) addq $PAGE_SIZE, %rax /* Workarea encryption routine */ push %r12 - movq %rdi, %r10 /* Encrypted kernel */ - movq %rsi, %r11 /* Decrypted kernel */ - movq %rdx, %r12 /* Kernel length */ + movq %rdi, %r10 /* Encrypted area */ + movq %rsi, %r11 /* Decrypted area */ + movq %rdx, %r12 /* Area length */ /* Copy encryption routine into the workarea */ movq %rax, %rdi /* Workarea encryption routine */ @@ -52,10 +52,10 @@ ENTRY(sme_encrypt_execute) rep movsb /* Setup registers for call */ - movq %r10, %rdi /* Encrypted kernel */ - movq %r11, %rsi /* Decrypted kernel */ + movq %r10, %rdi /* Encrypted area */ + movq %r11, %rsi /* Decrypted area */ movq %r8, %rdx /* Pagetables used for encryption */ - movq %r12, %rcx /* Kernel length */ + movq %r12, %rcx /* Area length */ movq %rax, %r8 /* Workarea encryption routine */ addq $PAGE_SIZE, %r8 /* Workarea intermediate copy buffer */ @@ -71,7 +71,7 @@ ENDPROC(sme_encrypt_execute) ENTRY(__enc_copy) /* - * Routine used to encrypt kernel. + * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since * the kernel will be encrypted during the process. So this * routine is defined here and then copied to an area outside @@ -79,19 +79,19 @@ ENTRY(__enc_copy) * during execution. * * On entry the registers must be: - * RDI - virtual address for the encrypted kernel mapping - * RSI - virtual address for the decrypted kernel mapping + * RDI - virtual address for the encrypted mapping + * RSI - virtual address for the decrypted mapping * RDX - address of the pagetables to use for encryption - * RCX - length of kernel + * RCX - length of area * R8 - intermediate copy buffer * * RAX - points to this routine * - * The kernel will be encrypted by copying from the non-encrypted - * kernel space to an intermediate buffer and then copying from the - * intermediate buffer back to the encrypted kernel space. The physical - * addresses of the two kernel space mappings are the same which - * results in the kernel being encrypted "in place". + * The area will be encrypted by copying from the non-encrypted + * memory space to an intermediate buffer and then copying from the + * intermediate buffer back to the encrypted memory space. The physical + * addresses of the two mappings are the same which results in the area + * being encrypted "in place". */ /* Enable the new page tables */ mov %rdx, %cr3 @@ -103,47 +103,55 @@ ENTRY(__enc_copy) orq $X86_CR4_PGE, %rdx mov %rdx, %cr4 + push %r15 + push %r12 + + movq %rcx, %r9 /* Save area length */ + movq %rdi, %r10 /* Save encrypted area address */ + movq %rsi, %r11 /* Save decrypted area address */ + /* Set the PAT register PA5 entry to write-protect */ - push %rcx movl $MSR_IA32_CR_PAT, %ecx rdmsr - push %rdx /* Save original PAT value */ + mov %rdx, %r15 /* Save original PAT value */ andl $0xffff00ff, %edx /* Clear PA5 */ orl $0x00000500, %edx /* Set PA5 to WP */ wrmsr - pop %rdx /* RDX contains original PAT value */ - pop %rcx - - movq %rcx, %r9 /* Save kernel length */ - movq %rdi, %r10 /* Save encrypted kernel address */ - movq %rsi, %r11 /* Save decrypted kernel address */ wbinvd /* Invalidate any cache entries */ - /* Copy/encrypt 2MB at a time */ + /* Copy/encrypt up to 2MB at a time */ + movq $PMD_PAGE_SIZE, %r12 1: - movq %r11, %rsi /* Source - decrypted kernel */ + cmpq %r12, %r9 + jnb 2f + movq %r9, %r12 + +2: + movq %r11, %rsi /* Source - decrypted area */ movq %r8, %rdi /* Dest - intermediate copy buffer */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r12, %rcx rep movsb movq %r8, %rsi /* Source - intermediate copy buffer */ - movq %r10, %rdi /* Dest - encrypted kernel */ - movq $PMD_PAGE_SIZE, %rcx /* 2MB length */ + movq %r10, %rdi /* Dest - encrypted area */ + movq %r12, %rcx rep movsb - addq $PMD_PAGE_SIZE, %r11 - addq $PMD_PAGE_SIZE, %r10 - subq $PMD_PAGE_SIZE, %r9 /* Kernel length decrement */ + addq %r12, %r11 + addq %r12, %r10 + subq %r12, %r9 /* Kernel length decrement */ jnz 1b /* Kernel length not zero? */ /* Restore PAT register */ - push %rdx /* Save original PAT value */ movl $MSR_IA32_CR_PAT, %ecx rdmsr - pop %rdx /* Restore original PAT value */ + mov %r15, %rdx /* Restore original PAT value */ wrmsr + pop %r12 + pop %r15 + ret .L__enc_copy_end: ENDPROC(__enc_copy) |