diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/powerpc/include/asm/ppc-opcode.h | 1 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit.h | 2 | ||||
-rw-r--r-- | arch/powerpc/net/bpf_jit_comp64.c | 8 | ||||
-rw-r--r-- | arch/riscv/boot/dts/Makefile | 2 | ||||
-rw-r--r-- | arch/riscv/boot/dts/sifive/Makefile | 2 | ||||
-rw-r--r-- | arch/riscv/boot/dts/sifive/fu540-c000.dtsi | 215 | ||||
-rw-r--r-- | arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts | 65 | ||||
-rw-r--r-- | arch/riscv/configs/defconfig | 4 | ||||
-rw-r--r-- | arch/riscv/include/asm/bitops.h | 5 | ||||
-rw-r--r-- | arch/riscv/kernel/reset.c | 1 | ||||
-rw-r--r-- | arch/riscv/lib/delay.c | 2 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 13 | ||||
-rw-r--r-- | arch/riscv/net/bpf_jit_comp.c | 24 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 74 |
14 files changed, 354 insertions, 64 deletions
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 493c5c943acd..2291daf39cd1 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -338,6 +338,7 @@ #define PPC_INST_MADDLD 0x10000033 #define PPC_INST_DIVWU 0x7c000396 #define PPC_INST_DIVD 0x7c0003d2 +#define PPC_INST_DIVDU 0x7c000392 #define PPC_INST_RLWINM 0x54000000 #define PPC_INST_RLWINM_DOT 0x54000001 #define PPC_INST_RLWIMI 0x50000000 diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index 6026a7af031d..55d4377ccfae 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -112,7 +112,7 @@ ___PPC_RA(a) | IMM_L(i)) #define PPC_DIVWU(d, a, b) EMIT(PPC_INST_DIVWU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) -#define PPC_DIVD(d, a, b) EMIT(PPC_INST_DIVD | ___PPC_RT(d) | \ +#define PPC_DIVDU(d, a, b) EMIT(PPC_INST_DIVDU | ___PPC_RT(d) | \ ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_AND(d, a, b) EMIT(PPC_INST_AND | ___PPC_RA(d) | \ ___PPC_RS(a) | ___PPC_RB(b)) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 63d05c499cac..c2ee6041f02c 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -395,12 +395,12 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */ case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */ if (BPF_OP(code) == BPF_MOD) { - PPC_DIVD(b2p[TMP_REG_1], dst_reg, src_reg); + PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg); PPC_MULD(b2p[TMP_REG_1], src_reg, b2p[TMP_REG_1]); PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } else - PPC_DIVD(dst_reg, dst_reg, src_reg); + PPC_DIVDU(dst_reg, dst_reg, src_reg); break; case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */ case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */ @@ -428,7 +428,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, break; case BPF_ALU64: if (BPF_OP(code) == BPF_MOD) { - PPC_DIVD(b2p[TMP_REG_2], dst_reg, + PPC_DIVDU(b2p[TMP_REG_2], dst_reg, b2p[TMP_REG_1]); PPC_MULD(b2p[TMP_REG_1], b2p[TMP_REG_1], @@ -436,7 +436,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]); } else - PPC_DIVD(dst_reg, dst_reg, + PPC_DIVDU(dst_reg, dst_reg, b2p[TMP_REG_1]); break; } diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile new file mode 100644 index 000000000000..dcc3ada78455 --- /dev/null +++ b/arch/riscv/boot/dts/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +subdir-y += sifive diff --git a/arch/riscv/boot/dts/sifive/Makefile b/arch/riscv/boot/dts/sifive/Makefile new file mode 100644 index 000000000000..baaeef9efdcb --- /dev/null +++ b/arch/riscv/boot/dts/sifive/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-y += hifive-unleashed-a00.dtb diff --git a/arch/riscv/boot/dts/sifive/fu540-c000.dtsi b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi new file mode 100644 index 000000000000..3c06ee4b2b29 --- /dev/null +++ b/arch/riscv/boot/dts/sifive/fu540-c000.dtsi @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2018-2019 SiFive, Inc */ + +/dts-v1/; + +#include <dt-bindings/clock/sifive-fu540-prci.h> + +/ { + #address-cells = <2>; + #size-cells = <2>; + compatible = "sifive,fu540-c000", "sifive,fu540"; + + aliases { + serial0 = &uart0; + serial1 = &uart1; + }; + + chosen { + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <1000000>; + cpu0: cpu@0 { + compatible = "sifive,e51", "sifive,rocket0", "riscv"; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <128>; + i-cache-size = <16384>; + reg = <0>; + riscv,isa = "rv64imac"; + status = "disabled"; + cpu0_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + cpu1: cpu@1 { + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <1>; + riscv,isa = "rv64imafdc"; + tlb-split; + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + cpu2: cpu@2 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <2>; + riscv,isa = "rv64imafdc"; + tlb-split; + cpu2_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + cpu3: cpu@3 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <3>; + riscv,isa = "rv64imafdc"; + tlb-split; + cpu3_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + cpu4: cpu@4 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <4>; + riscv,isa = "rv64imafdc"; + tlb-split; + cpu4_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + }; + soc { + #address-cells = <2>; + #size-cells = <2>; + compatible = "sifive,fu540-c000", "sifive,fu540", "simple-bus"; + ranges; + plic0: interrupt-controller@c000000 { + #interrupt-cells = <1>; + compatible = "sifive,plic-1.0.0"; + reg = <0x0 0xc000000 0x0 0x4000000>; + riscv,ndev = <53>; + interrupt-controller; + interrupts-extended = < + &cpu0_intc 0xffffffff + &cpu1_intc 0xffffffff &cpu1_intc 9 + &cpu2_intc 0xffffffff &cpu2_intc 9 + &cpu3_intc 0xffffffff &cpu3_intc 9 + &cpu4_intc 0xffffffff &cpu4_intc 9>; + }; + prci: clock-controller@10000000 { + compatible = "sifive,fu540-c000-prci"; + reg = <0x0 0x10000000 0x0 0x1000>; + clocks = <&hfclk>, <&rtcclk>; + #clock-cells = <1>; + }; + uart0: serial@10010000 { + compatible = "sifive,fu540-c000-uart", "sifive,uart0"; + reg = <0x0 0x10010000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <4>; + clocks = <&prci PRCI_CLK_TLCLK>; + }; + uart1: serial@10011000 { + compatible = "sifive,fu540-c000-uart", "sifive,uart0"; + reg = <0x0 0x10011000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <5>; + clocks = <&prci PRCI_CLK_TLCLK>; + }; + i2c0: i2c@10030000 { + compatible = "sifive,fu540-c000-i2c", "sifive,i2c0"; + reg = <0x0 0x10030000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <50>; + clocks = <&prci PRCI_CLK_TLCLK>; + reg-shift = <2>; + reg-io-width = <1>; + #address-cells = <1>; + #size-cells = <0>; + }; + qspi0: spi@10040000 { + compatible = "sifive,fu540-c000-spi", "sifive,spi0"; + reg = <0x0 0x10040000 0x0 0x1000 + 0x0 0x20000000 0x0 0x10000000>; + interrupt-parent = <&plic0>; + interrupts = <51>; + clocks = <&prci PRCI_CLK_TLCLK>; + #address-cells = <1>; + #size-cells = <0>; + }; + qspi1: spi@10041000 { + compatible = "sifive,fu540-c000-spi", "sifive,spi0"; + reg = <0x0 0x10041000 0x0 0x1000 + 0x0 0x30000000 0x0 0x10000000>; + interrupt-parent = <&plic0>; + interrupts = <52>; + clocks = <&prci PRCI_CLK_TLCLK>; + #address-cells = <1>; + #size-cells = <0>; + }; + qspi2: spi@10050000 { + compatible = "sifive,fu540-c000-spi", "sifive,spi0"; + reg = <0x0 0x10050000 0x0 0x1000>; + interrupt-parent = <&plic0>; + interrupts = <6>; + clocks = <&prci PRCI_CLK_TLCLK>; + #address-cells = <1>; + #size-cells = <0>; + }; + }; +}; diff --git a/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts new file mode 100644 index 000000000000..4da88707e28f --- /dev/null +++ b/arch/riscv/boot/dts/sifive/hifive-unleashed-a00.dts @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2018-2019 SiFive, Inc */ + +#include "fu540-c000.dtsi" + +/* Clock frequency (in Hz) of the PCB crystal for rtcclk */ +#define RTCCLK_FREQ 1000000 + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "SiFive HiFive Unleashed A00"; + compatible = "sifive,hifive-unleashed-a00", "sifive,fu540-c000"; + + chosen { + }; + + cpus { + timebase-frequency = <RTCCLK_FREQ>; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x2 0x00000000>; + }; + + soc { + }; + + hfclk: hfclk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <33333333>; + clock-output-names = "hfclk"; + }; + + rtcclk: rtcclk { + #clock-cells = <0>; + compatible = "fixed-clock"; + clock-frequency = <RTCCLK_FREQ>; + clock-output-names = "rtcclk"; + }; +}; + +&qspi0 { + flash@0 { + compatible = "issi,is25wp256", "jedec,spi-nor"; + reg = <0>; + spi-max-frequency = <50000000>; + m25p,fast-read; + spi-tx-bus-width = <4>; + spi-rx-bus-width = <4>; + }; +}; + +&qspi2 { + status = "okay"; + mmc@0 { + compatible = "mmc-spi-slot"; + reg = <0>; + spi-max-frequency = <20000000>; + voltage-ranges = <3300 3300>; + disable-wp; + }; +}; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 2fd3461e50ab..4f02967e55de 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -49,6 +49,8 @@ CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y +CONFIG_SERIAL_SIFIVE=y +CONFIG_SERIAL_SIFIVE_CONSOLE=y CONFIG_HVC_RISCV_SBI=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_DRM=y @@ -64,6 +66,8 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_VIRTIO_MMIO=y +CONFIG_CLK_SIFIVE=y +CONFIG_CLK_SIFIVE_FU540_PRCI=y CONFIG_SIFIVE_PLIC=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y diff --git a/arch/riscv/include/asm/bitops.h b/arch/riscv/include/asm/bitops.h index 3943be480af0..396a3303c537 100644 --- a/arch/riscv/include/asm/bitops.h +++ b/arch/riscv/include/asm/bitops.h @@ -15,11 +15,6 @@ #include <asm/barrier.h> #include <asm/bitsperlong.h> -#ifndef smp_mb__before_clear_bit -#define smp_mb__before_clear_bit() smp_mb() -#define smp_mb__after_clear_bit() smp_mb() -#endif /* smp_mb__before_clear_bit */ - #include <asm-generic/bitops/__ffs.h> #include <asm-generic/bitops/ffz.h> #include <asm-generic/bitops/fls.h> diff --git a/arch/riscv/kernel/reset.c b/arch/riscv/kernel/reset.c index cfb6eb1d762d..d0fe623bfb8f 100644 --- a/arch/riscv/kernel/reset.c +++ b/arch/riscv/kernel/reset.c @@ -13,6 +13,7 @@ static void default_power_off(void) } void (*pm_power_off)(void) = default_power_off; +EXPORT_SYMBOL(pm_power_off); void machine_restart(char *cmd) { diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index 7e893ae0f10e..87ff89e88f2c 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -80,7 +80,7 @@ EXPORT_SYMBOL(__delay); void udelay(unsigned long usecs) { - unsigned long ucycles = usecs * lpj_fine * UDELAY_MULT; + u64 ucycles = (u64)usecs * lpj_fine * UDELAY_MULT; if (unlikely(usecs > MAX_UDELAY_US)) { __delay((u64)usecs * riscv_timebase / 1000000ULL); diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index fd7662afddea..3e2708c626a8 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -16,6 +16,7 @@ #include <asm/pgalloc.h> #include <asm/ptrace.h> +#include <asm/tlbflush.h> /* * This routine handles page faults. It determines the address and the @@ -265,6 +266,18 @@ vmalloc_fault: pte_k = pte_offset_kernel(pmd_k, addr); if (!pte_present(*pte_k)) goto no_context; + + /* + * The kernel assumes that TLBs don't cache invalid + * entries, but in RISC-V, SFENCE.VMA specifies an + * ordering constraint, not a cache flush; it is + * necessary even after writing invalid entries. + * Relying on flush_tlb_fix_spurious_fault would + * suffice, but the extra traps reduce + * performance. So, eagerly SFENCE.VMA. + */ + local_flush_tlb_page(addr); + return; } } diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 80b12aa5e10d..426d5c33ea90 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -751,22 +751,32 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_ADD | BPF_X: case BPF_ALU64 | BPF_ADD | BPF_X: emit(is64 ? rv_add(rd, rd, rs) : rv_addw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_SUB | BPF_X: case BPF_ALU64 | BPF_SUB | BPF_X: emit(is64 ? rv_sub(rd, rd, rs) : rv_subw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_AND | BPF_X: case BPF_ALU64 | BPF_AND | BPF_X: emit(rv_and(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_OR | BPF_X: case BPF_ALU64 | BPF_OR | BPF_X: emit(rv_or(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_XOR | BPF_X: case BPF_ALU64 | BPF_XOR | BPF_X: emit(rv_xor(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MUL | BPF_X: case BPF_ALU64 | BPF_MUL | BPF_X: @@ -789,14 +799,20 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU | BPF_LSH | BPF_X: case BPF_ALU64 | BPF_LSH | BPF_X: emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_RSH | BPF_X: emit(is64 ? rv_srl(rd, rd, rs) : rv_srlw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: emit(is64 ? rv_sra(rd, rd, rs) : rv_sraw(rd, rd, rs), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; /* dst = -dst */ @@ -804,6 +820,8 @@ static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_ALU64 | BPF_NEG: emit(is64 ? rv_sub(rd, RV_REG_ZERO, rd) : rv_subw(rd, RV_REG_ZERO, rd), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; /* dst = BSWAP##imm(dst) */ @@ -958,14 +976,20 @@ out_be: case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU64 | BPF_LSH | BPF_K: emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU64 | BPF_RSH | BPF_K: emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_ARSH | BPF_K: case BPF_ALU64 | BPF_ARSH | BPF_K: emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx); + if (!is64) + emit_zext_32(rd, ctx); break; /* JUMP off */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 32bfab4e21eb..eaaed5bfc4a4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -186,9 +186,7 @@ struct jit_context { #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 -#define AUX_STACK_SPACE 40 /* Space for RBX, R13, R14, R15, tailcnt */ - -#define PROLOGUE_SIZE 37 +#define PROLOGUE_SIZE 20 /* * Emit x86-64 prologue code for BPF program and check its size. @@ -199,44 +197,19 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) u8 *prog = *pprog; int cnt = 0; - /* push rbp */ - EMIT1(0x55); - - /* mov rbp,rsp */ - EMIT3(0x48, 0x89, 0xE5); - - /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ - EMIT3_off32(0x48, 0x81, 0xEC, - round_up(stack_depth, 8) + AUX_STACK_SPACE); - - /* sub rbp, AUX_STACK_SPACE */ - EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); - - /* mov qword ptr [rbp+0],rbx */ - EMIT4(0x48, 0x89, 0x5D, 0); - /* mov qword ptr [rbp+8],r13 */ - EMIT4(0x4C, 0x89, 0x6D, 8); - /* mov qword ptr [rbp+16],r14 */ - EMIT4(0x4C, 0x89, 0x75, 16); - /* mov qword ptr [rbp+24],r15 */ - EMIT4(0x4C, 0x89, 0x7D, 24); - + EMIT1(0x55); /* push rbp */ + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ + /* sub rsp, rounded_stack_depth */ + EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); + EMIT1(0x53); /* push rbx */ + EMIT2(0x41, 0x55); /* push r13 */ + EMIT2(0x41, 0x56); /* push r14 */ + EMIT2(0x41, 0x57); /* push r15 */ if (!ebpf_from_cbpf) { - /* - * Clear the tail call counter (tail_call_cnt): for eBPF tail - * calls we need to reset the counter to 0. It's done in two - * instructions, resetting RAX register to 0, and moving it - * to the counter location. - */ - - /* xor eax, eax */ - EMIT2(0x31, 0xc0); - /* mov qword ptr [rbp+32], rax */ - EMIT4(0x48, 0x89, 0x45, 32); - + /* zero init tail_call_cnt */ + EMIT2(0x6a, 0x00); BUILD_BUG_ON(cnt != PROLOGUE_SIZE); } - *pprog = prog; } @@ -281,13 +254,13 @@ static void emit_bpf_tail_call(u8 **pprog) * if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; */ - EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ + EMIT2_off32(0x8B, 0x85, -36 - MAX_BPF_STACK); /* mov eax, dword ptr [rbp - 548] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ #define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ - EMIT2_off32(0x89, 0x85, 36); /* mov dword ptr [rbp + 36], eax */ + EMIT2_off32(0x89, 0x85, -36 - MAX_BPF_STACK); /* mov dword ptr [rbp -548], eax */ /* prog = array->ptrs[index]; */ EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ @@ -1036,19 +1009,14 @@ emit_jmp: seen_exit = true; /* Update cleanup_addr */ ctx->cleanup_addr = proglen; - /* mov rbx, qword ptr [rbp+0] */ - EMIT4(0x48, 0x8B, 0x5D, 0); - /* mov r13, qword ptr [rbp+8] */ - EMIT4(0x4C, 0x8B, 0x6D, 8); - /* mov r14, qword ptr [rbp+16] */ - EMIT4(0x4C, 0x8B, 0x75, 16); - /* mov r15, qword ptr [rbp+24] */ - EMIT4(0x4C, 0x8B, 0x7D, 24); - - /* add rbp, AUX_STACK_SPACE */ - EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE); - EMIT1(0xC9); /* leave */ - EMIT1(0xC3); /* ret */ + if (!bpf_prog_was_classic(bpf_prog)) + EMIT1(0x5B); /* get rid of tail_call_cnt */ + EMIT2(0x41, 0x5F); /* pop r15 */ + EMIT2(0x41, 0x5E); /* pop r14 */ + EMIT2(0x41, 0x5D); /* pop r13 */ + EMIT1(0x5B); /* pop rbx */ + EMIT1(0xC9); /* leave */ + EMIT1(0xC3); /* ret */ break; default: |