From fb084fde0c8106bc86df243411751c3421c07c08 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 14 Oct 2020 08:38:00 +0100 Subject: objtool: Fully validate the stack frame A valid stack frame should contain both the return address and the previous frame pointer value. On x86, the return value is placed on the stack by the calling instructions. On other architectures, the callee needs to explicitly save the return address on the stack. Add the necessary checks to verify a function properly sets up all the elements of the stack frame. Signed-off-by: Julien Thierry Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f8d3eed78a1..88210b0856f7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1733,12 +1733,20 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state return false; } +static bool check_reg_frame_pos(const struct cfi_reg *reg, + int expected_offset) +{ + return reg->base == CFI_CFA && + reg->offset == expected_offset; +} + static bool has_valid_stack_frame(struct insn_state *state) { struct cfi_state *cfi = &state->cfi; - if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA && - cfi->regs[CFI_BP].offset == -16) + if (cfi->cfa.base == CFI_BP && + check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) && + check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8)) return true; if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP) @@ -1867,8 +1875,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, case OP_SRC_REG: if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP && cfa->base == CFI_SP && - regs[CFI_BP].base == CFI_CFA && - regs[CFI_BP].offset == -cfa->offset) { + check_reg_frame_pos(®s[CFI_BP], -cfa->offset)) { /* mov %rsp, %rbp */ cfa->base = op->dest.reg; -- cgit v1.2.3 From 468af56a7bbaa626da5a4578bedc930d731fba13 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 14 Oct 2020 08:38:01 +0100 Subject: objtool: Support addition to set CFA base On arm64, the compiler can set the frame pointer either with a move operation or with and add operation like: add (SP + constant), BP For a simple move operation, the CFA base is changed from SP to BP. Handle also changing the CFA base when the frame pointer is set with an addition instruction. Signed-off-by: Julien Thierry Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 88210b0856f7..00d00f904536 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1960,6 +1960,17 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; } + if (!cfi->drap && op->src.reg == CFI_SP && + op->dest.reg == CFI_BP && cfa->base == CFI_SP && + check_reg_frame_pos(®s[CFI_BP], -cfa->offset + op->src.offset)) { + + /* lea disp(%rsp), %rbp */ + cfa->base = CFI_BP; + cfa->offset -= op->src.offset; + cfi->bp_scratch = false; + break; + } + if (op->src.reg == CFI_SP && cfa->base == CFI_SP) { /* drap: lea disp(%rsp), %drap */ -- cgit v1.2.3 From 201ef5a974e24112953b74cc9f33dcfc4cbcc1cb Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Wed, 14 Oct 2020 08:38:02 +0100 Subject: objtool: Make SP memory operation match PUSH/POP semantics Architectures without PUSH/POP instructions will always access the stack though memory operations (SRC/DEST_INDIRECT). Make those operations have the same effect on the CFA as PUSH/POP, with no stack pointer modification. Signed-off-by: Julien Thierry Acked-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 00d00f904536..270adc38d896 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2065,6 +2065,14 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, break; case OP_SRC_REG_INDIRECT: + if (!cfi->drap && op->dest.reg == cfa->base && + op->dest.reg == CFI_BP) { + + /* mov disp(%rsp), %rbp */ + cfa->base = CFI_SP; + cfa->offset = cfi->stack_size; + } + if (cfi->drap && op->src.reg == CFI_BP && op->src.offset == cfi->drap_offset) { @@ -2086,6 +2094,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* mov disp(%rbp), %reg */ /* mov disp(%rsp), %reg */ restore_reg(cfi, op->dest.reg); + + } else if (op->src.reg == CFI_SP && + op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) { + + /* mov disp(%rsp), %reg */ + restore_reg(cfi, op->dest.reg); } break; @@ -2163,6 +2177,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* mov reg, disp(%rsp) */ save_reg(cfi, op->src.reg, CFI_CFA, op->dest.offset - cfi->cfa.offset); + + } else if (op->dest.reg == CFI_SP) { + + /* mov reg, disp(%rsp) */ + save_reg(cfi, op->src.reg, CFI_CFA, + op->dest.offset - cfi->stack_size); } break; -- cgit v1.2.3 From 1d509f2a6ebca1aea3089c769f6375f01a832e9b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 13 Nov 2020 00:03:23 +0100 Subject: x86/insn: Support big endian cross-compiles The x86 instruction decoder code is shared across the kernel source and the tools. Currently objtool seems to be the only tool from build tools needed which breaks x86 cross-compilation on big endian systems. Make the x86 instruction decoder build host endianness agnostic to support x86 cross-compilation and enable objtool to implement endianness awareness for big endian architectures support. Signed-off-by: Martin Schwidefsky Co-developed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/insn.h | 33 +++++++++++++ arch/x86/lib/insn.c | 101 ++++++++++++++++++-------------------- arch/x86/tools/insn_sanity.c | 4 -- tools/arch/x86/include/asm/insn.h | 33 +++++++++++++ tools/arch/x86/lib/insn.c | 101 ++++++++++++++++++-------------------- 5 files changed, 160 insertions(+), 112 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index a8c3d284fa46..090863cfb7f3 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -7,9 +7,12 @@ * Copyright (C) IBM Corporation, 2009 */ +#include /* insn_attr_t is defined in inat.h */ #include +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + struct insn_field { union { insn_value_t value; @@ -20,6 +23,36 @@ struct insn_field { unsigned char nbytes; }; +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +#endif + struct insn { struct insn_field prefixes; /* * Prefixes diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 404279563891..520b31fc1f1a 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -5,6 +5,7 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#include #ifdef __KERNEL__ #include #else @@ -15,15 +16,28 @@ #include +#define leXX_to_cpu(t, r) \ +({ \ + __typeof__(t) v; \ + switch (sizeof(t)) { \ + case 4: v = le32_to_cpu(r); break; \ + case 2: v = le16_to_cpu(r); break; \ + case 1: v = r; break; \ + default: \ + BUILD_BUG(); break; \ + } \ + v; \ +}) + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); r; }) + ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) @@ -157,8 +171,7 @@ found: b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { - insn->rex_prefix.value = b; - insn->rex_prefix.nbytes = 1; + insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ @@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn) if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); - modrm->value = mod; - modrm->nbytes = 1; + insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, @@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn) * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ - return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); + return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** @@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn) if (!insn->modrm.got) insn_get_modrm(insn); if (insn->modrm.nbytes) { - modrm = (insn_byte_t)insn->modrm.value; + modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { - insn->sib.value = get_next(insn_byte_t, insn); - insn->sib.nbytes = 1; + insn_field_set(&insn->sib, + get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; @@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(signed char, insn); - insn->displacement.nbytes = 1; + insn_field_set(&insn->displacement, + get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { - insn->displacement.value = - get_next(short, insn); - insn->displacement.nbytes = 2; + insn_field_set(&insn->displacement, + get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { - insn->displacement.value = get_next(int, insn); - insn->displacement.nbytes = 4; + insn_field_set(&insn->displacement, + get_next(int, insn), 4); } } } @@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: - insn->moffset1.value = get_next(short, insn); - insn->moffset1.nbytes = 2; + insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; - insn->moffset2.value = get_next(int, insn); - insn->moffset2.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); + insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); + insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ @@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn) default: /* opnd_bytes must be modified manually */ goto err_out; } - insn->immediate2.value = get_next(unsigned short, insn); - insn->immediate2.nbytes = 2; + insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; @@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(signed char, insn); - insn->immediate.nbytes = 1; + insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) @@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(signed char, insn); - insn->immediate2.nbytes = 1; + insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; diff --git a/arch/x86/tools/insn_sanity.c b/arch/x86/tools/insn_sanity.c index 185ceba9d289..c6a0000ae635 100644 --- a/arch/x86/tools/insn_sanity.c +++ b/arch/x86/tools/insn_sanity.c @@ -14,10 +14,6 @@ #include #include #include - -#define unlikely(cond) (cond) -#define ARRAY_SIZE(a) (sizeof(a)/sizeof(a[0])) - #include #include #include diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index 52c6262e6bfd..c1fab7a570be 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -7,9 +7,12 @@ * Copyright (C) IBM Corporation, 2009 */ +#include /* insn_attr_t is defined in inat.h */ #include "inat.h" +#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN) + struct insn_field { union { insn_value_t value; @@ -20,6 +23,36 @@ struct insn_field { unsigned char nbytes; }; +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->nbytes = n; +} + +#else + +struct insn_field { + insn_value_t value; + union { + insn_value_t little; + insn_byte_t bytes[4]; + }; + /* !0 if we've run insn_get_xxx() for this field */ + unsigned char got; + unsigned char nbytes; +}; + +static inline void insn_field_set(struct insn_field *p, insn_value_t v, + unsigned char n) +{ + p->value = v; + p->little = __cpu_to_le32(v); + p->nbytes = n; +} + +#endif + struct insn { struct insn_field prefixes; /* * Prefixes diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 0151dfc6da61..77e92aa52cdc 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -5,6 +5,7 @@ * Copyright (C) IBM Corporation, 2002, 2004, 2009 */ +#include #ifdef __KERNEL__ #include #else @@ -15,15 +16,28 @@ #include "../include/asm/emulate_prefix.h" +#define leXX_to_cpu(t, r) \ +({ \ + __typeof__(t) v; \ + switch (sizeof(t)) { \ + case 4: v = le32_to_cpu(r); break; \ + case 2: v = le16_to_cpu(r); break; \ + case 1: v = r; break; \ + default: \ + BUILD_BUG(); break; \ + } \ + v; \ +}) + /* Verify next sizeof(t) bytes can be on the same instruction */ #define validate_next(t, insn, n) \ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; }) + ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); r; }) + ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) @@ -157,8 +171,7 @@ found: b = peek_next(insn_byte_t, insn); attr = inat_get_opcode_attribute(b); if (inat_is_rex_prefix(attr)) { - insn->rex_prefix.value = b; - insn->rex_prefix.nbytes = 1; + insn_field_set(&insn->rex_prefix, b, 1); insn->next_byte++; if (X86_REX_W(b)) /* REX.W overrides opnd_size */ @@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn) if (inat_has_modrm(insn->attr)) { mod = get_next(insn_byte_t, insn); - modrm->value = mod; - modrm->nbytes = 1; + insn_field_set(modrm, mod, 1); if (inat_is_group(insn->attr)) { pfx_id = insn_last_prefix_id(insn); insn->attr = inat_get_group_attribute(mod, pfx_id, @@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn) * For rip-relative instructions, the mod field (top 2 bits) * is zero and the r/m field (bottom 3 bits) is 0x5. */ - return (modrm->nbytes && (modrm->value & 0xc7) == 0x5); + return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5); } /** @@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn) if (!insn->modrm.got) insn_get_modrm(insn); if (insn->modrm.nbytes) { - modrm = (insn_byte_t)insn->modrm.value; + modrm = insn->modrm.bytes[0]; if (insn->addr_bytes != 2 && X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) { - insn->sib.value = get_next(insn_byte_t, insn); - insn->sib.nbytes = 1; + insn_field_set(&insn->sib, + get_next(insn_byte_t, insn), 1); } } insn->sib.got = 1; @@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn) if (mod == 3) goto out; if (mod == 1) { - insn->displacement.value = get_next(signed char, insn); - insn->displacement.nbytes = 1; + insn_field_set(&insn->displacement, + get_next(signed char, insn), 1); } else if (insn->addr_bytes == 2) { if ((mod == 0 && rm == 6) || mod == 2) { - insn->displacement.value = - get_next(short, insn); - insn->displacement.nbytes = 2; + insn_field_set(&insn->displacement, + get_next(short, insn), 2); } } else { if ((mod == 0 && rm == 5) || mod == 2 || (mod == 0 && base == 5)) { - insn->displacement.value = get_next(int, insn); - insn->displacement.nbytes = 4; + insn_field_set(&insn->displacement, + get_next(int, insn), 4); } } } @@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn) { switch (insn->addr_bytes) { case 2: - insn->moffset1.value = get_next(short, insn); - insn->moffset1.nbytes = 2; + insn_field_set(&insn->moffset1, get_next(short, insn), 2); break; case 4: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); break; case 8: - insn->moffset1.value = get_next(int, insn); - insn->moffset1.nbytes = 4; - insn->moffset2.value = get_next(int, insn); - insn->moffset2.nbytes = 4; + insn_field_set(&insn->moffset1, get_next(int, insn), 4); + insn_field_set(&insn->moffset2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case 4: case 8: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); + insn_field_set(&insn->immediate1, get_next(int, insn), 4); insn->immediate1.nbytes = 4; break; case 8: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; default: /* opnd_bytes must be modified manually */ goto err_out; @@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn) { switch (insn->opnd_bytes) { case 2: - insn->immediate1.value = get_next(short, insn); - insn->immediate1.nbytes = 2; + insn_field_set(&insn->immediate1, get_next(short, insn), 2); break; case 4: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); break; case 8: /* ptr16:64 is not exist (no segment) */ @@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn) default: /* opnd_bytes must be modified manually */ goto err_out; } - insn->immediate2.value = get_next(unsigned short, insn); - insn->immediate2.nbytes = 2; + insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2); insn->immediate1.got = insn->immediate2.got = 1; return 1; @@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn) switch (inat_immediate_size(insn->attr)) { case INAT_IMM_BYTE: - insn->immediate.value = get_next(signed char, insn); - insn->immediate.nbytes = 1; + insn_field_set(&insn->immediate, get_next(signed char, insn), 1); break; case INAT_IMM_WORD: - insn->immediate.value = get_next(short, insn); - insn->immediate.nbytes = 2; + insn_field_set(&insn->immediate, get_next(short, insn), 2); break; case INAT_IMM_DWORD: - insn->immediate.value = get_next(int, insn); - insn->immediate.nbytes = 4; + insn_field_set(&insn->immediate, get_next(int, insn), 4); break; case INAT_IMM_QWORD: - insn->immediate1.value = get_next(int, insn); - insn->immediate1.nbytes = 4; - insn->immediate2.value = get_next(int, insn); - insn->immediate2.nbytes = 4; + insn_field_set(&insn->immediate1, get_next(int, insn), 4); + insn_field_set(&insn->immediate2, get_next(int, insn), 4); break; case INAT_IMM_PTR: if (!__get_immptr(insn)) @@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn) goto err_out; } if (inat_has_second_immediate(insn->attr)) { - insn->immediate2.value = get_next(signed char, insn); - insn->immediate2.nbytes = 1; + insn_field_set(&insn->immediate2, get_next(signed char, insn), 1); } done: insn->immediate.got = 1; -- cgit v1.2.3 From a1a664ece586457e9f7652b0bc5b08386259e358 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 13 Nov 2020 00:03:26 +0100 Subject: objtool: Fix reloc generation on big endian cross-compiles Relocations generated in elf_rebuild_rel[a]_reloc_section() are broken if objtool is built and run on a big endian system. The following errors pop up during x86 cross-compilation: x86_64-9.1.0-ld: fs/efivarfs/inode.o: bad reloc symbol index (0x2000000 >= 0x22) for offset 0 in section `.orc_unwind_ip' x86_64-9.1.0-ld: final link failed: bad value Convert those functions to use gelf_update_rel[a](), similar to what elf_write_reloc() does. Signed-off-by: Martin Schwidefsky Co-developed-by: Vasily Gorbik Signed-off-by: Vasily Gorbik Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf --- tools/objtool/elf.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index be89c741ba9a..c784122b7ecb 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -855,25 +855,27 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr) { struct reloc *reloc; int idx = 0, size; - GElf_Rel *relocs; + void *buf; /* Allocate a buffer for relocations */ - size = nr * sizeof(*relocs); - relocs = malloc(size); - if (!relocs) { + size = nr * sizeof(GElf_Rel); + buf = malloc(size); + if (!buf) { perror("malloc"); return -1; } - sec->data->d_buf = relocs; + sec->data->d_buf = buf; sec->data->d_size = size; + sec->data->d_type = ELF_T_REL; sec->sh.sh_size = size; idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { - relocs[idx].r_offset = reloc->offset; - relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + reloc->rel.r_offset = reloc->offset; + reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + gelf_update_rel(sec->data, idx, &reloc->rel); idx++; } @@ -884,26 +886,28 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) { struct reloc *reloc; int idx = 0, size; - GElf_Rela *relocs; + void *buf; /* Allocate a buffer for relocations with addends */ - size = nr * sizeof(*relocs); - relocs = malloc(size); - if (!relocs) { + size = nr * sizeof(GElf_Rela); + buf = malloc(size); + if (!buf) { perror("malloc"); return -1; } - sec->data->d_buf = relocs; + sec->data->d_buf = buf; sec->data->d_size = size; + sec->data->d_type = ELF_T_RELA; sec->sh.sh_size = size; idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { - relocs[idx].r_offset = reloc->offset; - relocs[idx].r_addend = reloc->addend; - relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + reloc->rela.r_offset = reloc->offset; + reloc->rela.r_addend = reloc->addend; + reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); + gelf_update_rela(sec->data, idx, &reloc->rela); idx++; } -- cgit v1.2.3 From 8bfe273238d77d3cee18e4c03b2f26ae360b5661 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Nov 2020 00:03:29 +0100 Subject: objtool: Fix x86 orc generation on big endian cross-compiles Correct objtool orc generation endianness problems to enable fully functional x86 cross-compiles on big endian hardware. Introduce bswap_if_needed() macro, which does a byte swap if target endianness doesn't match the host, i.e. cross-compilation for little endian on big endian and vice versa. The macro is used for conversion of multi-byte values which are read from / about to be written to a target native endianness ELF file. Signed-off-by: Vasily Gorbik Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/orc_types.h | 10 +++++++ tools/arch/x86/include/asm/orc_types.h | 10 +++++++ tools/objtool/arch/x86/include/arch_endianness.h | 9 ++++++ tools/objtool/check.c | 5 ++-- tools/objtool/endianness.h | 38 ++++++++++++++++++++++++ tools/objtool/orc_dump.c | 5 ++-- tools/objtool/orc_gen.c | 3 ++ tools/objtool/special.c | 6 ++-- 8 files changed, 80 insertions(+), 6 deletions(-) create mode 100644 tools/objtool/arch/x86/include/arch_endianness.h create mode 100644 tools/objtool/endianness.h (limited to 'tools') diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index fdbffec4cfde..5a2baf28a1dc 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -40,6 +40,8 @@ #define ORC_REG_MAX 15 #ifndef __ASSEMBLY__ +#include + /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -51,10 +53,18 @@ struct orc_entry { s16 sp_offset; s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; unsigned end:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:5; + unsigned end:1; + unsigned type:2; +#endif } __packed; #endif /* __ASSEMBLY__ */ diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h index fdbffec4cfde..5a2baf28a1dc 100644 --- a/tools/arch/x86/include/asm/orc_types.h +++ b/tools/arch/x86/include/asm/orc_types.h @@ -40,6 +40,8 @@ #define ORC_REG_MAX 15 #ifndef __ASSEMBLY__ +#include + /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -51,10 +53,18 @@ struct orc_entry { s16 sp_offset; s16 bp_offset; +#if defined(__LITTLE_ENDIAN_BITFIELD) unsigned sp_reg:4; unsigned bp_reg:4; unsigned type:2; unsigned end:1; +#elif defined(__BIG_ENDIAN_BITFIELD) + unsigned bp_reg:4; + unsigned sp_reg:4; + unsigned unused:5; + unsigned end:1; + unsigned type:2; +#endif } __packed; #endif /* __ASSEMBLY__ */ diff --git a/tools/objtool/arch/x86/include/arch_endianness.h b/tools/objtool/arch/x86/include/arch_endianness.h new file mode 100644 index 000000000000..7c362527da20 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch_endianness.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ARCH_ENDIANNESS_H +#define _ARCH_ENDIANNESS_H + +#include + +#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN + +#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 270adc38d896..8cda0ef06522 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -13,6 +13,7 @@ #include "special.h" #include "warn.h" #include "arch_elf.h" +#include "endianness.h" #include #include @@ -1435,7 +1436,7 @@ static int read_unwind_hints(struct objtool_file *file) cfa = &insn->cfi.cfa; if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { - insn->ret_offset = hint->sp_offset; + insn->ret_offset = bswap_if_needed(hint->sp_offset); continue; } @@ -1447,7 +1448,7 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfa->offset = hint->sp_offset; + cfa->offset = bswap_if_needed(hint->sp_offset); insn->cfi.type = hint->type; insn->cfi.end = hint->end; } diff --git a/tools/objtool/endianness.h b/tools/objtool/endianness.h new file mode 100644 index 000000000000..ebece3191b58 --- /dev/null +++ b/tools/objtool/endianness.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _OBJTOOL_ENDIANNESS_H +#define _OBJTOOL_ENDIANNESS_H + +#include +#include +#include "arch_endianness.h" + +#ifndef __TARGET_BYTE_ORDER +#error undefined arch __TARGET_BYTE_ORDER +#endif + +#if __BYTE_ORDER != __TARGET_BYTE_ORDER +#define __NEED_BSWAP 1 +#else +#define __NEED_BSWAP 0 +#endif + +/* + * Does a byte swap if target endianness doesn't match the host, i.e. cross + * compilation for little endian on big endian and vice versa. + * To be used for multi-byte values conversion, which are read from / about + * to be written to a target native endianness ELF file. + */ +#define bswap_if_needed(val) \ +({ \ + __typeof__(val) __ret; \ + switch (sizeof(val)) { \ + case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ + case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ + case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ + default: \ + BUILD_BUG(); break; \ + } \ + __ret; \ +}) + +#endif /* _OBJTOOL_ENDIANNESS_H */ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 5e6a95368d35..4e818a22e44b 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -8,6 +8,7 @@ #include #include "objtool.h" #include "warn.h" +#include "endianness.h" static const char *reg_name(unsigned int reg) { @@ -197,11 +198,11 @@ int orc_dump(const char *_objname) printf(" sp:"); - print_reg(orc[i].sp_reg, orc[i].sp_offset); + print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset)); printf(" bp:"); - print_reg(orc[i].bp_reg, orc[i].bp_offset); + print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset)); printf(" type:%s end:%d\n", orc_type_name(orc[i].type), orc[i].end); diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 9ce68b385a1b..1be7e16b2595 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -11,6 +11,7 @@ #include "check.h" #include "warn.h" +#include "endianness.h" int create_orc(struct objtool_file *file) { @@ -96,6 +97,8 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti /* populate ORC data */ orc = (struct orc_entry *)u_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); + orc->sp_offset = bswap_if_needed(orc->sp_offset); + orc->bp_offset = bswap_if_needed(orc->bp_offset); /* populate reloc for ip */ reloc = malloc(sizeof(*reloc)); diff --git a/tools/objtool/special.c b/tools/objtool/special.c index 1a2420febd08..ab7cb1e13411 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -15,6 +15,7 @@ #include "special.h" #include "warn.h" #include "arch_special.h" +#include "endianness.h" struct special_entry { const char *sec; @@ -77,8 +78,9 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (entry->feature) { unsigned short feature; - feature = *(unsigned short *)(sec->data->d_buf + offset + - entry->feature); + feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf + + offset + + entry->feature)); arch_handle_alternative(feature, alt); } -- cgit v1.2.3 From 7786032e52cb02982a7154993b5d88c9c7a31ba5 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Nov 2020 00:03:32 +0100 Subject: objtool: Rework header include paths Currently objtool headers are being included either by their base name or included via ../ from a parent directory. In case of a base name usage: #include "warn.h" #include "arch_elf.h" it does not make it apparent from which directory the file comes from. To make it slightly better, and actually to avoid name clashes some arch specific files have "arch_" suffix. And files from an arch folder have to revert to including via ../ e.g: #include "../../elf.h" With additional architectures support and the code base growth there is a need for clearer headers naming scheme for multiple reasons: 1. to make it instantly obvious where these files come from (objtool itself / objtool arch|generic folders / some other external files), 2. to avoid name clashes of objtool arch specific headers, potential obtool arch generic headers and the system header files (there is /usr/include/elf.h already), 3. to avoid ../ includes and improve code readability. 4. to give a warm fuzzy feeling to developers who are mostly kernel developers and are accustomed to linux kernel headers arranging scheme. Doesn't this make it instantly obvious where are these files come from? #include #include And doesn't it look nicer to avoid ugly ../ includes? Which also guarantees this is elf.h from the objtool and not /usr/include/elf.h. #include This patch defines and implements new objtool headers arranging scheme. Which is: - all generic headers go to include/objtool (similar to include/linux) - all arch headers go to arch/$(SRCARCH)/include/arch (to get arch prefix). This is similar to linux arch specific "asm/*" headers but we are not abusing "asm" name and calling it what it is. This also helps to prevent name clashes (arch is not used in system headers or kernel exports). To bring objtool to this state the following things are done: 1. current top level tools/objtool/ headers are moved into include/objtool/ subdirectory, 2. arch specific headers, currently only arch/x86/include/ are moved into arch/x86/include/arch/ and were stripped of "arch_" suffix, 3. new -I$(srctree)/tools/objtool/include include path to make includes like possible, 4. rewriting file includes, 5. make git not to ignore include/objtool/ subdirectory. Signed-off-by: Vasily Gorbik Acked-by: Peter Zijlstra (Intel) Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf --- tools/objtool/.gitignore | 2 +- tools/objtool/Makefile | 1 + tools/objtool/arch.h | 93 -------------- tools/objtool/arch/x86/decode.c | 8 +- tools/objtool/arch/x86/include/arch/cfi_regs.h | 25 ++++ tools/objtool/arch/x86/include/arch/elf.h | 6 + tools/objtool/arch/x86/include/arch/endianness.h | 9 ++ tools/objtool/arch/x86/include/arch/special.h | 20 +++ tools/objtool/arch/x86/include/arch_elf.h | 6 - tools/objtool/arch/x86/include/arch_endianness.h | 9 -- tools/objtool/arch/x86/include/arch_special.h | 20 --- tools/objtool/arch/x86/include/cfi_regs.h | 25 ---- tools/objtool/arch/x86/special.c | 4 +- tools/objtool/builtin-check.c | 4 +- tools/objtool/builtin-orc.c | 4 +- tools/objtool/builtin.h | 16 --- tools/objtool/cfi.h | 38 ------ tools/objtool/check.c | 16 +-- tools/objtool/check.h | 69 ----------- tools/objtool/elf.c | 6 +- tools/objtool/elf.h | 150 ----------------------- tools/objtool/endianness.h | 38 ------ tools/objtool/include/objtool/arch.h | 93 ++++++++++++++ tools/objtool/include/objtool/builtin.h | 16 +++ tools/objtool/include/objtool/cfi.h | 38 ++++++ tools/objtool/include/objtool/check.h | 69 +++++++++++ tools/objtool/include/objtool/elf.h | 150 +++++++++++++++++++++++ tools/objtool/include/objtool/endianness.h | 38 ++++++ tools/objtool/include/objtool/objtool.h | 32 +++++ tools/objtool/include/objtool/special.h | 41 +++++++ tools/objtool/include/objtool/warn.h | 66 ++++++++++ tools/objtool/objtool.c | 6 +- tools/objtool/objtool.h | 32 ----- tools/objtool/orc_dump.c | 6 +- tools/objtool/orc_gen.c | 6 +- tools/objtool/special.c | 10 +- tools/objtool/special.h | 41 ------- tools/objtool/warn.h | 66 ---------- tools/objtool/weak.c | 2 +- 39 files changed, 641 insertions(+), 640 deletions(-) delete mode 100644 tools/objtool/arch.h create mode 100644 tools/objtool/arch/x86/include/arch/cfi_regs.h create mode 100644 tools/objtool/arch/x86/include/arch/elf.h create mode 100644 tools/objtool/arch/x86/include/arch/endianness.h create mode 100644 tools/objtool/arch/x86/include/arch/special.h delete mode 100644 tools/objtool/arch/x86/include/arch_elf.h delete mode 100644 tools/objtool/arch/x86/include/arch_endianness.h delete mode 100644 tools/objtool/arch/x86/include/arch_special.h delete mode 100644 tools/objtool/arch/x86/include/cfi_regs.h delete mode 100644 tools/objtool/builtin.h delete mode 100644 tools/objtool/cfi.h delete mode 100644 tools/objtool/check.h delete mode 100644 tools/objtool/elf.h delete mode 100644 tools/objtool/endianness.h create mode 100644 tools/objtool/include/objtool/arch.h create mode 100644 tools/objtool/include/objtool/builtin.h create mode 100644 tools/objtool/include/objtool/cfi.h create mode 100644 tools/objtool/include/objtool/check.h create mode 100644 tools/objtool/include/objtool/elf.h create mode 100644 tools/objtool/include/objtool/endianness.h create mode 100644 tools/objtool/include/objtool/objtool.h create mode 100644 tools/objtool/include/objtool/special.h create mode 100644 tools/objtool/include/objtool/warn.h delete mode 100644 tools/objtool/objtool.h delete mode 100644 tools/objtool/special.h delete mode 100644 tools/objtool/warn.h (limited to 'tools') diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore index 45cefda24c7b..14236db3677f 100644 --- a/tools/objtool/.gitignore +++ b/tools/objtool/.gitignore @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only arch/x86/lib/inat-tables.c -objtool +/objtool fixdep diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index 5cdb19036d7f..d179299980b9 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -27,6 +27,7 @@ all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ + -I$(srctree)/tools/objtool/include \ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS) diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h deleted file mode 100644 index 4a84c3081b8e..000000000000 --- a/tools/objtool/arch.h +++ /dev/null @@ -1,93 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Josh Poimboeuf - */ - -#ifndef _ARCH_H -#define _ARCH_H - -#include -#include -#include "objtool.h" -#include "cfi.h" - -#ifdef INSN_USE_ORC -#include -#endif - -enum insn_type { - INSN_JUMP_CONDITIONAL, - INSN_JUMP_UNCONDITIONAL, - INSN_JUMP_DYNAMIC, - INSN_JUMP_DYNAMIC_CONDITIONAL, - INSN_CALL, - INSN_CALL_DYNAMIC, - INSN_RETURN, - INSN_CONTEXT_SWITCH, - INSN_BUG, - INSN_NOP, - INSN_STAC, - INSN_CLAC, - INSN_STD, - INSN_CLD, - INSN_OTHER, -}; - -enum op_dest_type { - OP_DEST_REG, - OP_DEST_REG_INDIRECT, - OP_DEST_MEM, - OP_DEST_PUSH, - OP_DEST_PUSHF, - OP_DEST_LEAVE, -}; - -struct op_dest { - enum op_dest_type type; - unsigned char reg; - int offset; -}; - -enum op_src_type { - OP_SRC_REG, - OP_SRC_REG_INDIRECT, - OP_SRC_CONST, - OP_SRC_POP, - OP_SRC_POPF, - OP_SRC_ADD, - OP_SRC_AND, -}; - -struct op_src { - enum op_src_type type; - unsigned char reg; - int offset; -}; - -struct stack_op { - struct op_dest dest; - struct op_src src; - struct list_head list; -}; - -struct instruction; - -void arch_initial_func_cfi_state(struct cfi_init_state *state); - -int arch_decode_instruction(const struct elf *elf, const struct section *sec, - unsigned long offset, unsigned int maxlen, - unsigned int *len, enum insn_type *type, - unsigned long *immediate, - struct list_head *ops_list); - -bool arch_callee_saved_reg(unsigned char reg); - -unsigned long arch_jump_destination(struct instruction *insn); - -unsigned long arch_dest_reloc_offset(int addend); - -const char *arch_nop_insn(int len); - -int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); - -#endif /* _ARCH_H */ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index cde9c36e40ae..6baa22732ca6 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -11,11 +11,11 @@ #include "../../../arch/x86/lib/inat.c" #include "../../../arch/x86/lib/insn.c" -#include "../../check.h" -#include "../../elf.h" -#include "../../arch.h" -#include "../../warn.h" #include +#include +#include +#include +#include static unsigned char op_to_cfi_reg[][2] = { {CFI_AX, CFI_R8}, diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h new file mode 100644 index 000000000000..79bc517efba8 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#ifndef _OBJTOOL_CFI_REGS_H +#define _OBJTOOL_CFI_REGS_H + +#define CFI_AX 0 +#define CFI_DX 1 +#define CFI_CX 2 +#define CFI_BX 3 +#define CFI_SI 4 +#define CFI_DI 5 +#define CFI_BP 6 +#define CFI_SP 7 +#define CFI_R8 8 +#define CFI_R9 9 +#define CFI_R10 10 +#define CFI_R11 11 +#define CFI_R12 12 +#define CFI_R13 13 +#define CFI_R14 14 +#define CFI_R15 15 +#define CFI_RA 16 +#define CFI_NUM_REGS 17 + +#endif /* _OBJTOOL_CFI_REGS_H */ diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h new file mode 100644 index 000000000000..69cc4264b28a --- /dev/null +++ b/tools/objtool/arch/x86/include/arch/elf.h @@ -0,0 +1,6 @@ +#ifndef _OBJTOOL_ARCH_ELF +#define _OBJTOOL_ARCH_ELF + +#define R_NONE R_X86_64_NONE + +#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h new file mode 100644 index 000000000000..7c362527da20 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch/endianness.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ARCH_ENDIANNESS_H +#define _ARCH_ENDIANNESS_H + +#include + +#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN + +#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h new file mode 100644 index 000000000000..d818b2bffa02 --- /dev/null +++ b/tools/objtool/arch/x86/include/arch/special.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _X86_ARCH_SPECIAL_H +#define _X86_ARCH_SPECIAL_H + +#define EX_ENTRY_SIZE 12 +#define EX_ORIG_OFFSET 0 +#define EX_NEW_OFFSET 4 + +#define JUMP_ENTRY_SIZE 16 +#define JUMP_ORIG_OFFSET 0 +#define JUMP_NEW_OFFSET 4 + +#define ALT_ENTRY_SIZE 13 +#define ALT_ORIG_OFFSET 0 +#define ALT_NEW_OFFSET 4 +#define ALT_FEATURE_OFFSET 8 +#define ALT_ORIG_LEN_OFFSET 10 +#define ALT_NEW_LEN_OFFSET 11 + +#endif /* _X86_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/x86/include/arch_elf.h b/tools/objtool/arch/x86/include/arch_elf.h deleted file mode 100644 index 69cc4264b28a..000000000000 --- a/tools/objtool/arch/x86/include/arch_elf.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef _OBJTOOL_ARCH_ELF -#define _OBJTOOL_ARCH_ELF - -#define R_NONE R_X86_64_NONE - -#endif /* _OBJTOOL_ARCH_ELF */ diff --git a/tools/objtool/arch/x86/include/arch_endianness.h b/tools/objtool/arch/x86/include/arch_endianness.h deleted file mode 100644 index 7c362527da20..000000000000 --- a/tools/objtool/arch/x86/include/arch_endianness.h +++ /dev/null @@ -1,9 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _ARCH_ENDIANNESS_H -#define _ARCH_ENDIANNESS_H - -#include - -#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN - -#endif /* _ARCH_ENDIANNESS_H */ diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h deleted file mode 100644 index d818b2bffa02..000000000000 --- a/tools/objtool/arch/x86/include/arch_special.h +++ /dev/null @@ -1,20 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _X86_ARCH_SPECIAL_H -#define _X86_ARCH_SPECIAL_H - -#define EX_ENTRY_SIZE 12 -#define EX_ORIG_OFFSET 0 -#define EX_NEW_OFFSET 4 - -#define JUMP_ENTRY_SIZE 16 -#define JUMP_ORIG_OFFSET 0 -#define JUMP_NEW_OFFSET 4 - -#define ALT_ENTRY_SIZE 13 -#define ALT_ORIG_OFFSET 0 -#define ALT_NEW_OFFSET 4 -#define ALT_FEATURE_OFFSET 8 -#define ALT_ORIG_LEN_OFFSET 10 -#define ALT_NEW_LEN_OFFSET 11 - -#endif /* _X86_ARCH_SPECIAL_H */ diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/cfi_regs.h deleted file mode 100644 index 79bc517efba8..000000000000 --- a/tools/objtool/arch/x86/include/cfi_regs.h +++ /dev/null @@ -1,25 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ - -#ifndef _OBJTOOL_CFI_REGS_H -#define _OBJTOOL_CFI_REGS_H - -#define CFI_AX 0 -#define CFI_DX 1 -#define CFI_CX 2 -#define CFI_BX 3 -#define CFI_SI 4 -#define CFI_DI 5 -#define CFI_BP 6 -#define CFI_SP 7 -#define CFI_R8 8 -#define CFI_R9 9 -#define CFI_R10 10 -#define CFI_R11 11 -#define CFI_R12 12 -#define CFI_R13 13 -#define CFI_R14 14 -#define CFI_R15 15 -#define CFI_RA 16 -#define CFI_NUM_REGS 17 - -#endif /* _OBJTOOL_CFI_REGS_H */ diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index fd4af88c0ea5..b4bd3505fc94 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include -#include "../../special.h" -#include "../../builtin.h" +#include +#include #define X86_FEATURE_POPCNT (4 * 32 + 23) #define X86_FEATURE_SMAP (9 * 32 + 20) diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c6d199bfd0ae..f47951e19c9d 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -15,8 +15,8 @@ #include #include -#include "builtin.h" -#include "objtool.h" +#include +#include bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 7b31121fa60b..6745f3328a0e 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -13,8 +13,8 @@ */ #include -#include "builtin.h" -#include "objtool.h" +#include +#include static const char *orc_usage[] = { "objtool orc generate [] file.o", diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h deleted file mode 100644 index 85c979caa367..000000000000 --- a/tools/objtool/builtin.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Josh Poimboeuf - */ -#ifndef _BUILTIN_H -#define _BUILTIN_H - -#include - -extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; - -extern int cmd_check(int argc, const char **argv); -extern int cmd_orc(int argc, const char **argv); - -#endif /* _BUILTIN_H */ diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h deleted file mode 100644 index c7c59c6a44ee..000000000000 --- a/tools/objtool/cfi.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015-2017 Josh Poimboeuf - */ - -#ifndef _OBJTOOL_CFI_H -#define _OBJTOOL_CFI_H - -#include "cfi_regs.h" - -#define CFI_UNDEFINED -1 -#define CFI_CFA -2 -#define CFI_SP_INDIRECT -3 -#define CFI_BP_INDIRECT -4 - -struct cfi_reg { - int base; - int offset; -}; - -struct cfi_init_state { - struct cfi_reg regs[CFI_NUM_REGS]; - struct cfi_reg cfa; -}; - -struct cfi_state { - struct cfi_reg regs[CFI_NUM_REGS]; - struct cfi_reg vals[CFI_NUM_REGS]; - struct cfi_reg cfa; - int stack_size; - int drap_reg, drap_offset; - unsigned char type; - bool bp_scratch; - bool drap; - bool end; -}; - -#endif /* _OBJTOOL_CFI_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8cda0ef06522..8976047cb648 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -6,14 +6,14 @@ #include #include -#include "builtin.h" -#include "cfi.h" -#include "arch.h" -#include "check.h" -#include "special.h" -#include "warn.h" -#include "arch_elf.h" -#include "endianness.h" +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/tools/objtool/check.h b/tools/objtool/check.h deleted file mode 100644 index 5ec00a4b891b..000000000000 --- a/tools/objtool/check.h +++ /dev/null @@ -1,69 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2017 Josh Poimboeuf - */ - -#ifndef _CHECK_H -#define _CHECK_H - -#include -#include "cfi.h" -#include "arch.h" - -struct insn_state { - struct cfi_state cfi; - unsigned int uaccess_stack; - bool uaccess; - bool df; - bool noinstr; - s8 instr; -}; - -struct instruction { - struct list_head list; - struct hlist_node hash; - struct list_head static_call_node; - struct section *sec; - unsigned long offset; - unsigned int len; - enum insn_type type; - unsigned long immediate; - bool dead_end, ignore, ignore_alts; - bool hint; - bool retpoline_safe; - s8 instr; - u8 visited; - u8 ret_offset; - int alt_group; - struct symbol *call_dest; - struct instruction *jump_dest; - struct instruction *first_jump_src; - struct reloc *jump_table; - struct list_head alts; - struct symbol *func; - struct list_head stack_ops; - struct cfi_state cfi; -#ifdef INSN_USE_ORC - struct orc_entry orc; -#endif -}; - -static inline bool is_static_jump(struct instruction *insn) -{ - return insn->type == INSN_JUMP_CONDITIONAL || - insn->type == INSN_JUMP_UNCONDITIONAL; -} - -struct instruction *find_insn(struct objtool_file *file, - struct section *sec, unsigned long offset); - -#define for_each_insn(file, insn) \ - list_for_each_entry(insn, &file->insn_list, list) - -#define sec_for_each_insn(file, sec, insn) \ - for (insn = find_insn(file, sec, 0); \ - insn && &insn->list != &file->insn_list && \ - insn->sec == sec; \ - insn = list_next_entry(insn, list)) - -#endif /* _CHECK_H */ diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c784122b7ecb..43714ecd09f7 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -15,10 +15,10 @@ #include #include #include -#include "builtin.h" +#include -#include "elf.h" -#include "warn.h" +#include +#include #define MAX_NAME_LEN 128 diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h deleted file mode 100644 index e6890cc70a25..000000000000 --- a/tools/objtool/elf.h +++ /dev/null @@ -1,150 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Josh Poimboeuf - */ - -#ifndef _OBJTOOL_ELF_H -#define _OBJTOOL_ELF_H - -#include -#include -#include -#include -#include -#include - -#ifdef LIBELF_USE_DEPRECATED -# define elf_getshdrnum elf_getshnum -# define elf_getshdrstrndx elf_getshstrndx -#endif - -/* - * Fallback for systems without this "read, mmaping if possible" cmd. - */ -#ifndef ELF_C_READ_MMAP -#define ELF_C_READ_MMAP ELF_C_READ -#endif - -struct section { - struct list_head list; - struct hlist_node hash; - struct hlist_node name_hash; - GElf_Shdr sh; - struct rb_root symbol_tree; - struct list_head symbol_list; - struct list_head reloc_list; - struct section *base, *reloc; - struct symbol *sym; - Elf_Data *data; - char *name; - int idx; - unsigned int len; - bool changed, text, rodata, noinstr; -}; - -struct symbol { - struct list_head list; - struct rb_node node; - struct hlist_node hash; - struct hlist_node name_hash; - GElf_Sym sym; - struct section *sec; - char *name; - unsigned int idx; - unsigned char bind, type; - unsigned long offset; - unsigned int len; - struct symbol *pfunc, *cfunc, *alias; - bool uaccess_safe; - bool static_call_tramp; -}; - -struct reloc { - struct list_head list; - struct hlist_node hash; - union { - GElf_Rela rela; - GElf_Rel rel; - }; - struct section *sec; - struct symbol *sym; - unsigned long offset; - unsigned int type; - int addend; - int idx; - bool jump_table_start; -}; - -#define ELF_HASH_BITS 20 - -struct elf { - Elf *elf; - GElf_Ehdr ehdr; - int fd; - bool changed; - char *name; - struct list_head sections; - DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS); - DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS); -}; - -#define OFFSET_STRIDE_BITS 4 -#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) -#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1)) - -#define for_offset_range(_offset, _start, _end) \ - for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ - _offset >= ((_start) & OFFSET_STRIDE_MASK) && \ - _offset <= ((_end) & OFFSET_STRIDE_MASK); \ - _offset += OFFSET_STRIDE) - -static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) -{ - u32 ol, oh, idx = sec->idx; - - offset &= OFFSET_STRIDE_MASK; - - ol = offset; - oh = (offset >> 16) >> 16; - - __jhash_mix(ol, oh, idx); - - return ol; -} - -static inline u32 reloc_hash(struct reloc *reloc) -{ - return sec_offset_hash(reloc->sec, reloc->offset); -} - -struct elf *elf_open_read(const char *name, int flags); -struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); -struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); -void elf_add_reloc(struct elf *elf, struct reloc *reloc); -int elf_write_insn(struct elf *elf, struct section *sec, - unsigned long offset, unsigned int len, - const char *insn); -int elf_write_reloc(struct elf *elf, struct reloc *reloc); -int elf_write(struct elf *elf); -void elf_close(struct elf *elf); - -struct section *find_section_by_name(const struct elf *elf, const char *name); -struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); -struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); -struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); -struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); -struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); -struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, - unsigned long offset, unsigned int len); -struct symbol *find_func_containing(struct section *sec, unsigned long offset); -void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, - struct reloc *reloc); -int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); - -#define for_each_sec(file, sec) \ - list_for_each_entry(sec, &file->elf->sections, list) - -#endif /* _OBJTOOL_ELF_H */ diff --git a/tools/objtool/endianness.h b/tools/objtool/endianness.h deleted file mode 100644 index ebece3191b58..000000000000 --- a/tools/objtool/endianness.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _OBJTOOL_ENDIANNESS_H -#define _OBJTOOL_ENDIANNESS_H - -#include -#include -#include "arch_endianness.h" - -#ifndef __TARGET_BYTE_ORDER -#error undefined arch __TARGET_BYTE_ORDER -#endif - -#if __BYTE_ORDER != __TARGET_BYTE_ORDER -#define __NEED_BSWAP 1 -#else -#define __NEED_BSWAP 0 -#endif - -/* - * Does a byte swap if target endianness doesn't match the host, i.e. cross - * compilation for little endian on big endian and vice versa. - * To be used for multi-byte values conversion, which are read from / about - * to be written to a target native endianness ELF file. - */ -#define bswap_if_needed(val) \ -({ \ - __typeof__(val) __ret; \ - switch (sizeof(val)) { \ - case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ - case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ - case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ - default: \ - BUILD_BUG(); break; \ - } \ - __ret; \ -}) - -#endif /* _OBJTOOL_ENDIANNESS_H */ diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h new file mode 100644 index 000000000000..dc4f503a3ae4 --- /dev/null +++ b/tools/objtool/include/objtool/arch.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015 Josh Poimboeuf + */ + +#ifndef _ARCH_H +#define _ARCH_H + +#include +#include +#include +#include + +#ifdef INSN_USE_ORC +#include +#endif + +enum insn_type { + INSN_JUMP_CONDITIONAL, + INSN_JUMP_UNCONDITIONAL, + INSN_JUMP_DYNAMIC, + INSN_JUMP_DYNAMIC_CONDITIONAL, + INSN_CALL, + INSN_CALL_DYNAMIC, + INSN_RETURN, + INSN_CONTEXT_SWITCH, + INSN_BUG, + INSN_NOP, + INSN_STAC, + INSN_CLAC, + INSN_STD, + INSN_CLD, + INSN_OTHER, +}; + +enum op_dest_type { + OP_DEST_REG, + OP_DEST_REG_INDIRECT, + OP_DEST_MEM, + OP_DEST_PUSH, + OP_DEST_PUSHF, + OP_DEST_LEAVE, +}; + +struct op_dest { + enum op_dest_type type; + unsigned char reg; + int offset; +}; + +enum op_src_type { + OP_SRC_REG, + OP_SRC_REG_INDIRECT, + OP_SRC_CONST, + OP_SRC_POP, + OP_SRC_POPF, + OP_SRC_ADD, + OP_SRC_AND, +}; + +struct op_src { + enum op_src_type type; + unsigned char reg; + int offset; +}; + +struct stack_op { + struct op_dest dest; + struct op_src src; + struct list_head list; +}; + +struct instruction; + +void arch_initial_func_cfi_state(struct cfi_init_state *state); + +int arch_decode_instruction(const struct elf *elf, const struct section *sec, + unsigned long offset, unsigned int maxlen, + unsigned int *len, enum insn_type *type, + unsigned long *immediate, + struct list_head *ops_list); + +bool arch_callee_saved_reg(unsigned char reg); + +unsigned long arch_jump_destination(struct instruction *insn); + +unsigned long arch_dest_reloc_offset(int addend); + +const char *arch_nop_insn(int len); + +int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg); + +#endif /* _ARCH_H */ diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h new file mode 100644 index 000000000000..85c979caa367 --- /dev/null +++ b/tools/objtool/include/objtool/builtin.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015 Josh Poimboeuf + */ +#ifndef _BUILTIN_H +#define _BUILTIN_H + +#include + +extern const struct option check_options[]; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; + +extern int cmd_check(int argc, const char **argv); +extern int cmd_orc(int argc, const char **argv); + +#endif /* _BUILTIN_H */ diff --git a/tools/objtool/include/objtool/cfi.h b/tools/objtool/include/objtool/cfi.h new file mode 100644 index 000000000000..fd5cb0bed9bf --- /dev/null +++ b/tools/objtool/include/objtool/cfi.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015-2017 Josh Poimboeuf + */ + +#ifndef _OBJTOOL_CFI_H +#define _OBJTOOL_CFI_H + +#include + +#define CFI_UNDEFINED -1 +#define CFI_CFA -2 +#define CFI_SP_INDIRECT -3 +#define CFI_BP_INDIRECT -4 + +struct cfi_reg { + int base; + int offset; +}; + +struct cfi_init_state { + struct cfi_reg regs[CFI_NUM_REGS]; + struct cfi_reg cfa; +}; + +struct cfi_state { + struct cfi_reg regs[CFI_NUM_REGS]; + struct cfi_reg vals[CFI_NUM_REGS]; + struct cfi_reg cfa; + int stack_size; + int drap_reg, drap_offset; + unsigned char type; + bool bp_scratch; + bool drap; + bool end; +}; + +#endif /* _OBJTOOL_CFI_H */ diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h new file mode 100644 index 000000000000..bba10968eac0 --- /dev/null +++ b/tools/objtool/include/objtool/check.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017 Josh Poimboeuf + */ + +#ifndef _CHECK_H +#define _CHECK_H + +#include +#include +#include + +struct insn_state { + struct cfi_state cfi; + unsigned int uaccess_stack; + bool uaccess; + bool df; + bool noinstr; + s8 instr; +}; + +struct instruction { + struct list_head list; + struct hlist_node hash; + struct list_head static_call_node; + struct section *sec; + unsigned long offset; + unsigned int len; + enum insn_type type; + unsigned long immediate; + bool dead_end, ignore, ignore_alts; + bool hint; + bool retpoline_safe; + s8 instr; + u8 visited; + u8 ret_offset; + int alt_group; + struct symbol *call_dest; + struct instruction *jump_dest; + struct instruction *first_jump_src; + struct reloc *jump_table; + struct list_head alts; + struct symbol *func; + struct list_head stack_ops; + struct cfi_state cfi; +#ifdef INSN_USE_ORC + struct orc_entry orc; +#endif +}; + +static inline bool is_static_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_CONDITIONAL || + insn->type == INSN_JUMP_UNCONDITIONAL; +} + +struct instruction *find_insn(struct objtool_file *file, + struct section *sec, unsigned long offset); + +#define for_each_insn(file, insn) \ + list_for_each_entry(insn, &file->insn_list, list) + +#define sec_for_each_insn(file, sec, insn) \ + for (insn = find_insn(file, sec, 0); \ + insn && &insn->list != &file->insn_list && \ + insn->sec == sec; \ + insn = list_next_entry(insn, list)) + +#endif /* _CHECK_H */ diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h new file mode 100644 index 000000000000..e6890cc70a25 --- /dev/null +++ b/tools/objtool/include/objtool/elf.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015 Josh Poimboeuf + */ + +#ifndef _OBJTOOL_ELF_H +#define _OBJTOOL_ELF_H + +#include +#include +#include +#include +#include +#include + +#ifdef LIBELF_USE_DEPRECATED +# define elf_getshdrnum elf_getshnum +# define elf_getshdrstrndx elf_getshstrndx +#endif + +/* + * Fallback for systems without this "read, mmaping if possible" cmd. + */ +#ifndef ELF_C_READ_MMAP +#define ELF_C_READ_MMAP ELF_C_READ +#endif + +struct section { + struct list_head list; + struct hlist_node hash; + struct hlist_node name_hash; + GElf_Shdr sh; + struct rb_root symbol_tree; + struct list_head symbol_list; + struct list_head reloc_list; + struct section *base, *reloc; + struct symbol *sym; + Elf_Data *data; + char *name; + int idx; + unsigned int len; + bool changed, text, rodata, noinstr; +}; + +struct symbol { + struct list_head list; + struct rb_node node; + struct hlist_node hash; + struct hlist_node name_hash; + GElf_Sym sym; + struct section *sec; + char *name; + unsigned int idx; + unsigned char bind, type; + unsigned long offset; + unsigned int len; + struct symbol *pfunc, *cfunc, *alias; + bool uaccess_safe; + bool static_call_tramp; +}; + +struct reloc { + struct list_head list; + struct hlist_node hash; + union { + GElf_Rela rela; + GElf_Rel rel; + }; + struct section *sec; + struct symbol *sym; + unsigned long offset; + unsigned int type; + int addend; + int idx; + bool jump_table_start; +}; + +#define ELF_HASH_BITS 20 + +struct elf { + Elf *elf; + GElf_Ehdr ehdr; + int fd; + bool changed; + char *name; + struct list_head sections; + DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS); + DECLARE_HASHTABLE(reloc_hash, ELF_HASH_BITS); +}; + +#define OFFSET_STRIDE_BITS 4 +#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS) +#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1)) + +#define for_offset_range(_offset, _start, _end) \ + for (_offset = ((_start) & OFFSET_STRIDE_MASK); \ + _offset >= ((_start) & OFFSET_STRIDE_MASK) && \ + _offset <= ((_end) & OFFSET_STRIDE_MASK); \ + _offset += OFFSET_STRIDE) + +static inline u32 sec_offset_hash(struct section *sec, unsigned long offset) +{ + u32 ol, oh, idx = sec->idx; + + offset &= OFFSET_STRIDE_MASK; + + ol = offset; + oh = (offset >> 16) >> 16; + + __jhash_mix(ol, oh, idx); + + return ol; +} + +static inline u32 reloc_hash(struct reloc *reloc) +{ + return sec_offset_hash(reloc->sec, reloc->offset); +} + +struct elf *elf_open_read(const char *name, int flags); +struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr); +struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype); +void elf_add_reloc(struct elf *elf, struct reloc *reloc); +int elf_write_insn(struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len, + const char *insn); +int elf_write_reloc(struct elf *elf, struct reloc *reloc); +int elf_write(struct elf *elf); +void elf_close(struct elf *elf); + +struct section *find_section_by_name(const struct elf *elf, const char *name); +struct symbol *find_func_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_name(const struct elf *elf, const char *name); +struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset); +struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, unsigned long offset); +struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec, + unsigned long offset, unsigned int len); +struct symbol *find_func_containing(struct section *sec, unsigned long offset); +void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset, + struct reloc *reloc); +int elf_rebuild_reloc_section(struct elf *elf, struct section *sec); + +#define for_each_sec(file, sec) \ + list_for_each_entry(sec, &file->elf->sections, list) + +#endif /* _OBJTOOL_ELF_H */ diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h new file mode 100644 index 000000000000..10241341eff3 --- /dev/null +++ b/tools/objtool/include/objtool/endianness.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _OBJTOOL_ENDIANNESS_H +#define _OBJTOOL_ENDIANNESS_H + +#include +#include +#include + +#ifndef __TARGET_BYTE_ORDER +#error undefined arch __TARGET_BYTE_ORDER +#endif + +#if __BYTE_ORDER != __TARGET_BYTE_ORDER +#define __NEED_BSWAP 1 +#else +#define __NEED_BSWAP 0 +#endif + +/* + * Does a byte swap if target endianness doesn't match the host, i.e. cross + * compilation for little endian on big endian and vice versa. + * To be used for multi-byte values conversion, which are read from / about + * to be written to a target native endianness ELF file. + */ +#define bswap_if_needed(val) \ +({ \ + __typeof__(val) __ret; \ + switch (sizeof(val)) { \ + case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break; \ + case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break; \ + case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break; \ + default: \ + BUILD_BUG(); break; \ + } \ + __ret; \ +}) + +#endif /* _OBJTOOL_ENDIANNESS_H */ diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h new file mode 100644 index 000000000000..32f4cd1da9fa --- /dev/null +++ b/tools/objtool/include/objtool/objtool.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Matt Helsley + */ + +#ifndef _OBJTOOL_H +#define _OBJTOOL_H + +#include +#include +#include + +#include + +#define __weak __attribute__((weak)) + +struct objtool_file { + struct elf *elf; + struct list_head insn_list; + DECLARE_HASHTABLE(insn_hash, 20); + struct list_head static_call_list; + bool ignore_unreachables, c_file, hints, rodata; +}; + +struct objtool_file *objtool_open_read(const char *_objname); + +int check(struct objtool_file *file); +int orc_dump(const char *objname); +int create_orc(struct objtool_file *file); +int create_orc_sections(struct objtool_file *file); + +#endif /* _OBJTOOL_H */ diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h new file mode 100644 index 000000000000..8a09f4e9d480 --- /dev/null +++ b/tools/objtool/include/objtool/special.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015 Josh Poimboeuf + */ + +#ifndef _SPECIAL_H +#define _SPECIAL_H + +#include +#include +#include + +#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" + +struct special_alt { + struct list_head list; + + bool group; + bool skip_orig; + bool skip_alt; + bool jump_or_nop; + + struct section *orig_sec; + unsigned long orig_off; + + struct section *new_sec; + unsigned long new_off; + + unsigned int orig_len, new_len; /* group only */ +}; + +int special_get_alts(struct elf *elf, struct list_head *alts); + +void arch_handle_alternative(unsigned short feature, struct special_alt *alt); + +bool arch_support_alt_relocation(struct special_alt *special_alt, + struct instruction *insn, + struct reloc *reloc); +struct reloc *arch_find_switch_table(struct objtool_file *file, + struct instruction *insn); +#endif /* _SPECIAL_H */ diff --git a/tools/objtool/include/objtool/warn.h b/tools/objtool/include/objtool/warn.h new file mode 100644 index 000000000000..d99c4675e4a5 --- /dev/null +++ b/tools/objtool/include/objtool/warn.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2015 Josh Poimboeuf + */ + +#ifndef _WARN_H +#define _WARN_H + +#include +#include +#include +#include +#include +#include + +extern const char *objname; + +static inline char *offstr(struct section *sec, unsigned long offset) +{ + struct symbol *func; + char *name, *str; + unsigned long name_off; + + func = find_func_containing(sec, offset); + if (func) { + name = func->name; + name_off = offset - func->offset; + } else { + name = sec->name; + name_off = offset; + } + + str = malloc(strlen(name) + 20); + + if (func) + sprintf(str, "%s()+0x%lx", name, name_off); + else + sprintf(str, "%s+0x%lx", name, name_off); + + return str; +} + +#define WARN(format, ...) \ + fprintf(stderr, \ + "%s: warning: objtool: " format "\n", \ + objname, ##__VA_ARGS__) + +#define WARN_FUNC(format, sec, offset, ...) \ +({ \ + char *_str = offstr(sec, offset); \ + WARN("%s: " format, _str, ##__VA_ARGS__); \ + free(_str); \ +}) + +#define BT_FUNC(format, insn, ...) \ +({ \ + struct instruction *_insn = (insn); \ + char *_str = offstr(_insn->sec, _insn->offset); \ + WARN(" %s: " format, _str, ##__VA_ARGS__); \ + free(_str); \ +}) + +#define WARN_ELF(format, ...) \ + WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) + +#endif /* _WARN_H */ diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 9df0cd86d310..e848feb0a5fc 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -21,9 +21,9 @@ #include #include -#include "builtin.h" -#include "objtool.h" -#include "warn.h" +#include +#include +#include struct cmd_struct { const char *name; diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h deleted file mode 100644 index 4125d4578b23..000000000000 --- a/tools/objtool/objtool.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2020 Matt Helsley - */ - -#ifndef _OBJTOOL_H -#define _OBJTOOL_H - -#include -#include -#include - -#include "elf.h" - -#define __weak __attribute__((weak)) - -struct objtool_file { - struct elf *elf; - struct list_head insn_list; - DECLARE_HASHTABLE(insn_hash, 20); - struct list_head static_call_list; - bool ignore_unreachables, c_file, hints, rodata; -}; - -struct objtool_file *objtool_open_read(const char *_objname); - -int check(struct objtool_file *file); -int orc_dump(const char *objname); -int create_orc(struct objtool_file *file); -int create_orc_sections(struct objtool_file *file); - -#endif /* _OBJTOOL_H */ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index 4e818a22e44b..c53fae9dbe93 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -6,9 +6,9 @@ #include #include #include -#include "objtool.h" -#include "warn.h" -#include "endianness.h" +#include +#include +#include static const char *reg_name(unsigned int reg) { diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 1be7e16b2595..2e5fb787a382 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -9,9 +9,9 @@ #include #include -#include "check.h" -#include "warn.h" -#include "endianness.h" +#include +#include +#include int create_orc(struct objtool_file *file) { diff --git a/tools/objtool/special.c b/tools/objtool/special.c index ab7cb1e13411..2c7fbda7b055 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -11,11 +11,11 @@ #include #include -#include "builtin.h" -#include "special.h" -#include "warn.h" -#include "arch_special.h" -#include "endianness.h" +#include +#include +#include +#include +#include struct special_entry { const char *sec; diff --git a/tools/objtool/special.h b/tools/objtool/special.h deleted file mode 100644 index abddf38ef334..000000000000 --- a/tools/objtool/special.h +++ /dev/null @@ -1,41 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Josh Poimboeuf - */ - -#ifndef _SPECIAL_H -#define _SPECIAL_H - -#include -#include "check.h" -#include "elf.h" - -#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table" - -struct special_alt { - struct list_head list; - - bool group; - bool skip_orig; - bool skip_alt; - bool jump_or_nop; - - struct section *orig_sec; - unsigned long orig_off; - - struct section *new_sec; - unsigned long new_off; - - unsigned int orig_len, new_len; /* group only */ -}; - -int special_get_alts(struct elf *elf, struct list_head *alts); - -void arch_handle_alternative(unsigned short feature, struct special_alt *alt); - -bool arch_support_alt_relocation(struct special_alt *special_alt, - struct instruction *insn, - struct reloc *reloc); -struct reloc *arch_find_switch_table(struct objtool_file *file, - struct instruction *insn); -#endif /* _SPECIAL_H */ diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h deleted file mode 100644 index 7799f60de80a..000000000000 --- a/tools/objtool/warn.h +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Josh Poimboeuf - */ - -#ifndef _WARN_H -#define _WARN_H - -#include -#include -#include -#include -#include -#include "elf.h" - -extern const char *objname; - -static inline char *offstr(struct section *sec, unsigned long offset) -{ - struct symbol *func; - char *name, *str; - unsigned long name_off; - - func = find_func_containing(sec, offset); - if (func) { - name = func->name; - name_off = offset - func->offset; - } else { - name = sec->name; - name_off = offset; - } - - str = malloc(strlen(name) + 20); - - if (func) - sprintf(str, "%s()+0x%lx", name, name_off); - else - sprintf(str, "%s+0x%lx", name, name_off); - - return str; -} - -#define WARN(format, ...) \ - fprintf(stderr, \ - "%s: warning: objtool: " format "\n", \ - objname, ##__VA_ARGS__) - -#define WARN_FUNC(format, sec, offset, ...) \ -({ \ - char *_str = offstr(sec, offset); \ - WARN("%s: " format, _str, ##__VA_ARGS__); \ - free(_str); \ -}) - -#define BT_FUNC(format, insn, ...) \ -({ \ - struct instruction *_insn = (insn); \ - char *_str = offstr(_insn->sec, _insn->offset); \ - WARN(" %s: " format, _str, ##__VA_ARGS__); \ - free(_str); \ -}) - -#define WARN_ELF(format, ...) \ - WARN(format ": %s", ##__VA_ARGS__, elf_errmsg(-1)) - -#endif /* _WARN_H */ diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index 7843e9a7a72f..f2716827cc30 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -7,7 +7,7 @@ #include #include -#include "objtool.h" +#include #define UNSUPPORTED(name) \ ({ \ -- cgit v1.2.3 From 5ed934e57e712b676ca62e1904ad672a9fa1505a Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Fri, 13 Nov 2020 17:09:54 +0100 Subject: x86/insn: Fix vector instruction decoding on big endian cross-compiles Running instruction decoder posttest on an s390 host with an x86 target with allyesconfig shows errors. Instructions used in a couple of kernel objects could not be correctly decoded on big endian system. insn_decoder_test: warning: objdump says 6 bytes, but insn_get_length() says 5 insn_decoder_test: warning: Found an x86 instruction decoder bug, please report this. insn_decoder_test: warning: ffffffff831eb4e1: 62 d1 fd 48 7f 04 24 vmovdqa64 %zmm0,(%r12) insn_decoder_test: warning: objdump says 7 bytes, but insn_get_length() says 6 insn_decoder_test: warning: Found an x86 instruction decoder bug, please report this. insn_decoder_test: warning: ffffffff831eb4e8: 62 51 fd 48 7f 44 24 01 vmovdqa64 %zmm8,0x40(%r12) insn_decoder_test: warning: objdump says 8 bytes, but insn_get_length() says 6 This is because in a few places instruction field bytes are set directly with further usage of "value". To address that introduce and use a insn_set_byte() helper, which correctly updates "value" on big endian systems. Signed-off-by: Vasily Gorbik Acked-by: Masami Hiramatsu Signed-off-by: Josh Poimboeuf --- arch/x86/include/asm/insn.h | 12 ++++++++++++ arch/x86/lib/insn.c | 18 +++++++++--------- tools/arch/x86/include/asm/insn.h | 12 ++++++++++++ tools/arch/x86/lib/insn.c | 18 +++++++++--------- 4 files changed, 42 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 090863cfb7f3..95a448fbb44c 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -30,6 +30,12 @@ static inline void insn_field_set(struct insn_field *p, insn_value_t v, p->nbytes = n; } +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + #else struct insn_field { @@ -51,6 +57,12 @@ static inline void insn_field_set(struct insn_field *p, insn_value_t v, p->nbytes = n; } +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} #endif struct insn { diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c index 520b31fc1f1a..435630a6ec97 100644 --- a/arch/x86/lib/insn.c +++ b/arch/x86/lib/insn.c @@ -161,9 +161,9 @@ found: b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) - prefixes->bytes[i] = b; + insn_set_byte(prefixes, i, b); } - insn->prefixes.bytes[3] = lb; + insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ @@ -194,13 +194,13 @@ found: if (X86_MODRM_MOD(b2) != 3) goto vex_end; } - insn->vex_prefix.bytes[0] = b; - insn->vex_prefix.bytes[1] = b2; + insn_set_byte(&insn->vex_prefix, 0, b); + insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); - insn->vex_prefix.bytes[3] = b2; + insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) @@ -208,7 +208,7 @@ found: insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) @@ -220,7 +220,7 @@ found: * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ - insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } @@ -256,7 +256,7 @@ void insn_get_opcode(struct insn *insn) /* Get first opcode */ op = get_next(insn_byte_t, insn); - opcode->bytes[0] = op; + insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h index c1fab7a570be..cc777c185212 100644 --- a/tools/arch/x86/include/asm/insn.h +++ b/tools/arch/x86/include/asm/insn.h @@ -30,6 +30,12 @@ static inline void insn_field_set(struct insn_field *p, insn_value_t v, p->nbytes = n; } +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; +} + #else struct insn_field { @@ -51,6 +57,12 @@ static inline void insn_field_set(struct insn_field *p, insn_value_t v, p->nbytes = n; } +static inline void insn_set_byte(struct insn_field *p, unsigned char n, + insn_byte_t v) +{ + p->bytes[n] = v; + p->value = __le32_to_cpu(p->little); +} #endif struct insn { diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index 77e92aa52cdc..3d9355ed1246 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -161,9 +161,9 @@ found: b = insn->prefixes.bytes[3]; for (i = 0; i < nb; i++) if (prefixes->bytes[i] == lb) - prefixes->bytes[i] = b; + insn_set_byte(prefixes, i, b); } - insn->prefixes.bytes[3] = lb; + insn_set_byte(&insn->prefixes, 3, lb); } /* Decode REX prefix */ @@ -194,13 +194,13 @@ found: if (X86_MODRM_MOD(b2) != 3) goto vex_end; } - insn->vex_prefix.bytes[0] = b; - insn->vex_prefix.bytes[1] = b2; + insn_set_byte(&insn->vex_prefix, 0, b); + insn_set_byte(&insn->vex_prefix, 1, b2); if (inat_is_evex_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); b2 = peek_nbyte_next(insn_byte_t, insn, 3); - insn->vex_prefix.bytes[3] = b2; + insn_set_byte(&insn->vex_prefix, 3, b2); insn->vex_prefix.nbytes = 4; insn->next_byte += 4; if (insn->x86_64 && X86_VEX_W(b2)) @@ -208,7 +208,7 @@ found: insn->opnd_bytes = 8; } else if (inat_is_vex3_prefix(attr)) { b2 = peek_nbyte_next(insn_byte_t, insn, 2); - insn->vex_prefix.bytes[2] = b2; + insn_set_byte(&insn->vex_prefix, 2, b2); insn->vex_prefix.nbytes = 3; insn->next_byte += 3; if (insn->x86_64 && X86_VEX_W(b2)) @@ -220,7 +220,7 @@ found: * Makes it easier to decode vex.W, vex.vvvv, * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0. */ - insn->vex_prefix.bytes[2] = b2 & 0x7f; + insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f); insn->vex_prefix.nbytes = 2; insn->next_byte += 2; } @@ -256,7 +256,7 @@ void insn_get_opcode(struct insn *insn) /* Get first opcode */ op = get_next(insn_byte_t, insn); - opcode->bytes[0] = op; + insn_set_byte(opcode, 0, op); opcode->nbytes = 1; /* Check if there is VEX prefix or not */ -- cgit v1.2.3 From ab4e0744e99b87e1a223e89fc3c9ae44f727c9a6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 17 Dec 2020 15:02:42 -0600 Subject: objtool: Refactor ORC section generation Decouple ORC entries from instructions. This simplifies the control/data flow, and is going to make it easier to support alternative instructions which change the stack layout. Signed-off-by: Josh Poimboeuf --- tools/objtool/Makefile | 4 - tools/objtool/builtin-orc.c | 6 +- tools/objtool/include/objtool/arch.h | 4 - tools/objtool/include/objtool/check.h | 3 - tools/objtool/include/objtool/objtool.h | 3 +- tools/objtool/orc_gen.c | 272 ++++++++++++++++---------------- tools/objtool/weak.c | 7 +- 7 files changed, 140 insertions(+), 159 deletions(-) (limited to 'tools') diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index d179299980b9..92ce4fce7bc7 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -47,10 +47,6 @@ ifeq ($(SRCARCH),x86) SUBCMD_ORC := y endif -ifeq ($(SUBCMD_ORC),y) - CFLAGS += -DINSN_USE_ORC -endif - export SUBCMD_CHECK SUBCMD_ORC export srctree OUTPUT CFLAGS SRCARCH AWK include $(srctree)/tools/build/Makefile.include diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c index 6745f3328a0e..8273bbf7cebb 100644 --- a/tools/objtool/builtin-orc.c +++ b/tools/objtool/builtin-orc.c @@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv) if (list_empty(&file->insn_list)) return 0; - ret = create_orc(file); - if (ret) - return ret; - - ret = create_orc_sections(file); + ret = orc_create(file); if (ret) return ret; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index dc4f503a3ae4..6ff0685f5cc5 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -11,10 +11,6 @@ #include #include -#ifdef INSN_USE_ORC -#include -#endif - enum insn_type { INSN_JUMP_CONDITIONAL, INSN_JUMP_UNCONDITIONAL, diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index bba10968eac0..df1e3e8ed204 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -43,9 +43,6 @@ struct instruction { struct symbol *func; struct list_head stack_ops; struct cfi_state cfi; -#ifdef INSN_USE_ORC - struct orc_entry orc; -#endif }; static inline bool is_static_jump(struct instruction *insn) diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h index 32f4cd1da9fa..e114642efb65 100644 --- a/tools/objtool/include/objtool/objtool.h +++ b/tools/objtool/include/objtool/objtool.h @@ -26,7 +26,6 @@ struct objtool_file *objtool_open_read(const char *_objname); int check(struct objtool_file *file); int orc_dump(const char *objname); -int create_orc(struct objtool_file *file); -int create_orc_sections(struct objtool_file *file); +int orc_create(struct objtool_file *file); #endif /* _OBJTOOL_H */ diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 2e5fb787a382..38e1a8dbfff0 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -13,89 +13,84 @@ #include #include -int create_orc(struct objtool_file *file) +static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi) { - struct instruction *insn; + struct instruction *insn = container_of(cfi, struct instruction, cfi); + struct cfi_reg *bp = &cfi->regs[CFI_BP]; - for_each_insn(file, insn) { - struct orc_entry *orc = &insn->orc; - struct cfi_reg *cfa = &insn->cfi.cfa; - struct cfi_reg *bp = &insn->cfi.regs[CFI_BP]; + memset(orc, 0, sizeof(*orc)); - if (!insn->sec->text) - continue; - - orc->end = insn->cfi.end; + orc->end = cfi->end; - if (cfa->base == CFI_UNDEFINED) { - orc->sp_reg = ORC_REG_UNDEFINED; - continue; - } - - switch (cfa->base) { - case CFI_SP: - orc->sp_reg = ORC_REG_SP; - break; - case CFI_SP_INDIRECT: - orc->sp_reg = ORC_REG_SP_INDIRECT; - break; - case CFI_BP: - orc->sp_reg = ORC_REG_BP; - break; - case CFI_BP_INDIRECT: - orc->sp_reg = ORC_REG_BP_INDIRECT; - break; - case CFI_R10: - orc->sp_reg = ORC_REG_R10; - break; - case CFI_R13: - orc->sp_reg = ORC_REG_R13; - break; - case CFI_DI: - orc->sp_reg = ORC_REG_DI; - break; - case CFI_DX: - orc->sp_reg = ORC_REG_DX; - break; - default: - WARN_FUNC("unknown CFA base reg %d", - insn->sec, insn->offset, cfa->base); - return -1; - } + if (cfi->cfa.base == CFI_UNDEFINED) { + orc->sp_reg = ORC_REG_UNDEFINED; + return 0; + } - switch(bp->base) { - case CFI_UNDEFINED: - orc->bp_reg = ORC_REG_UNDEFINED; - break; - case CFI_CFA: - orc->bp_reg = ORC_REG_PREV_SP; - break; - case CFI_BP: - orc->bp_reg = ORC_REG_BP; - break; - default: - WARN_FUNC("unknown BP base reg %d", - insn->sec, insn->offset, bp->base); - return -1; - } + switch (cfi->cfa.base) { + case CFI_SP: + orc->sp_reg = ORC_REG_SP; + break; + case CFI_SP_INDIRECT: + orc->sp_reg = ORC_REG_SP_INDIRECT; + break; + case CFI_BP: + orc->sp_reg = ORC_REG_BP; + break; + case CFI_BP_INDIRECT: + orc->sp_reg = ORC_REG_BP_INDIRECT; + break; + case CFI_R10: + orc->sp_reg = ORC_REG_R10; + break; + case CFI_R13: + orc->sp_reg = ORC_REG_R13; + break; + case CFI_DI: + orc->sp_reg = ORC_REG_DI; + break; + case CFI_DX: + orc->sp_reg = ORC_REG_DX; + break; + default: + WARN_FUNC("unknown CFA base reg %d", + insn->sec, insn->offset, cfi->cfa.base); + return -1; + } - orc->sp_offset = cfa->offset; - orc->bp_offset = bp->offset; - orc->type = insn->cfi.type; + switch (bp->base) { + case CFI_UNDEFINED: + orc->bp_reg = ORC_REG_UNDEFINED; + break; + case CFI_CFA: + orc->bp_reg = ORC_REG_PREV_SP; + break; + case CFI_BP: + orc->bp_reg = ORC_REG_BP; + break; + default: + WARN_FUNC("unknown BP base reg %d", + insn->sec, insn->offset, bp->base); + return -1; } + orc->sp_offset = cfi->cfa.offset; + orc->bp_offset = bp->offset; + orc->type = cfi->type; + return 0; } -static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec, - unsigned int idx, struct section *insn_sec, - unsigned long insn_off, struct orc_entry *o) +static int write_orc_entry(struct elf *elf, struct section *orc_sec, + struct section *ip_rsec, unsigned int idx, + struct section *insn_sec, unsigned long insn_off, + struct orc_entry *o) { struct orc_entry *orc; struct reloc *reloc; /* populate ORC data */ - orc = (struct orc_entry *)u_sec->data->d_buf + idx; + orc = (struct orc_entry *)orc_sec->data->d_buf + idx; memcpy(orc, o, sizeof(*orc)); orc->sp_offset = bswap_if_needed(orc->sp_offset); orc->bp_offset = bswap_if_needed(orc->bp_offset); @@ -117,102 +112,109 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti reloc->type = R_X86_64_PC32; reloc->offset = idx * sizeof(int); - reloc->sec = ip_relocsec; + reloc->sec = ip_rsec; elf_add_reloc(elf, reloc); return 0; } -int create_orc_sections(struct objtool_file *file) +struct orc_list_entry { + struct list_head list; + struct orc_entry orc; + struct section *insn_sec; + unsigned long insn_off; +}; + +static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc, + struct section *sec, unsigned long offset) +{ + struct orc_list_entry *entry = malloc(sizeof(*entry)); + + if (!entry) { + WARN("malloc failed"); + return -1; + } + + entry->orc = *orc; + entry->insn_sec = sec; + entry->insn_off = offset; + + list_add_tail(&entry->list, orc_list); + return 0; +} + +int orc_create(struct objtool_file *file) { - struct instruction *insn, *prev_insn; - struct section *sec, *u_sec, *ip_relocsec; - unsigned int idx; + struct section *sec, *ip_rsec, *orc_sec; + unsigned int nr = 0, idx = 0; + struct orc_list_entry *entry; + struct list_head orc_list; - struct orc_entry empty = { - .sp_reg = ORC_REG_UNDEFINED, + struct orc_entry null = { + .sp_reg = ORC_REG_UNDEFINED, .bp_reg = ORC_REG_UNDEFINED, .type = UNWIND_HINT_TYPE_CALL, }; - sec = find_section_by_name(file->elf, ".orc_unwind"); - if (sec) { - WARN("file already has .orc_unwind section, skipping"); - return -1; - } - - /* count the number of needed orcs */ - idx = 0; + /* Build a deduplicated list of ORC entries: */ + INIT_LIST_HEAD(&orc_list); for_each_sec(file, sec) { + struct orc_entry orc, prev_orc = {0}; + struct instruction *insn; + bool empty = true; + if (!sec->text) continue; - prev_insn = NULL; sec_for_each_insn(file, sec, insn) { - if (!prev_insn || - memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { - idx++; - } - prev_insn = insn; + if (init_orc_entry(&orc, &insn->cfi)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, sec, insn->offset)) + return -1; + nr++; + prev_orc = orc; + empty = false; } - /* section terminator */ - if (prev_insn) - idx++; + /* Add a section terminator */ + if (!empty) { + orc_list_add(&orc_list, &null, sec, sec->len); + nr++; + } } - if (!idx) - return -1; + if (!nr) + return 0; + /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */ + sec = find_section_by_name(file->elf, ".orc_unwind"); + if (sec) { + WARN("file already has .orc_unwind section, skipping"); + return -1; + } + orc_sec = elf_create_section(file->elf, ".orc_unwind", 0, + sizeof(struct orc_entry), nr); + if (!orc_sec) + return -1; - /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */ - sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx); + sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr); if (!sec) return -1; - - ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); - if (!ip_relocsec) + ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!ip_rsec) return -1; - /* create .orc_unwind section */ - u_sec = elf_create_section(file->elf, ".orc_unwind", 0, - sizeof(struct orc_entry), idx); - - /* populate sections */ - idx = 0; - for_each_sec(file, sec) { - if (!sec->text) - continue; - - prev_insn = NULL; - sec_for_each_insn(file, sec, insn) { - if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc, - sizeof(struct orc_entry))) { - - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - insn->sec, insn->offset, - &insn->orc)) - return -1; - - idx++; - } - prev_insn = insn; - } - - /* section terminator */ - if (prev_insn) { - if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx, - prev_insn->sec, - prev_insn->offset + prev_insn->len, - &empty)) - return -1; - - idx++; - } + /* Write ORC entries to sections: */ + list_for_each_entry(entry, &orc_list, list) { + if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++, + entry->insn_sec, entry->insn_off, + &entry->orc)) + return -1; } - if (elf_rebuild_reloc_section(file->elf, ip_relocsec)) + if (elf_rebuild_reloc_section(file->elf, ip_rsec)) return -1; return 0; diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c index f2716827cc30..8314e824db4a 100644 --- a/tools/objtool/weak.c +++ b/tools/objtool/weak.c @@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname) UNSUPPORTED("orc"); } -int __weak create_orc(struct objtool_file *file) -{ - UNSUPPORTED("orc"); -} - -int __weak create_orc_sections(struct objtool_file *file) +int __weak orc_create(struct objtool_file *file) { UNSUPPORTED("orc"); } -- cgit v1.2.3 From b23cc71c62747f2e4c3e56138872cf47e1294f8a Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Dec 2020 14:19:32 -0600 Subject: objtool: Add 'alt_group' struct Create a new struct associated with each group of alternatives instructions. This will help with the removal of fake jumps, and more importantly with adding support for stack layout changes in alternatives. Signed-off-by: Josh Poimboeuf --- tools/objtool/check.c | 29 +++++++++++++++++++++++------ tools/objtool/include/objtool/check.h | 13 ++++++++++++- 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8976047cb648..9aa324b14d5d 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -992,20 +992,28 @@ static int handle_group_alt(struct objtool_file *file, struct instruction *orig_insn, struct instruction **new_insn) { - static unsigned int alt_group_next_index = 1; struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; - unsigned int alt_group = alt_group_next_index++; + struct alt_group *orig_alt_group, *new_alt_group; unsigned long dest_off; + + orig_alt_group = malloc(sizeof(*orig_alt_group)); + if (!orig_alt_group) { + WARN("malloc failed"); + return -1; + } last_orig_insn = NULL; insn = orig_insn; sec_for_each_insn_from(file, insn) { if (insn->offset >= special_alt->orig_off + special_alt->orig_len) break; - insn->alt_group = alt_group; + insn->alt_group = orig_alt_group; last_orig_insn = insn; } + orig_alt_group->orig_group = NULL; + orig_alt_group->first_insn = orig_insn; + orig_alt_group->last_insn = last_orig_insn; if (next_insn_same_sec(file, last_orig_insn)) { fake_jump = malloc(sizeof(*fake_jump)); @@ -1036,8 +1044,13 @@ static int handle_group_alt(struct objtool_file *file, return 0; } + new_alt_group = malloc(sizeof(*new_alt_group)); + if (!new_alt_group) { + WARN("malloc failed"); + return -1; + } + last_new_insn = NULL; - alt_group = alt_group_next_index++; insn = *new_insn; sec_for_each_insn_from(file, insn) { struct reloc *alt_reloc; @@ -1049,7 +1062,7 @@ static int handle_group_alt(struct objtool_file *file, insn->ignore = orig_insn->ignore_alts; insn->func = orig_insn->func; - insn->alt_group = alt_group; + insn->alt_group = new_alt_group; /* * Since alternative replacement code is copy/pasted by the @@ -1098,6 +1111,10 @@ static int handle_group_alt(struct objtool_file *file, return -1; } + new_alt_group->orig_group = orig_alt_group; + new_alt_group->first_insn = *new_insn; + new_alt_group->last_insn = last_new_insn; + if (fake_jump) list_add(&fake_jump->list, &last_new_insn->list); @@ -2451,7 +2468,7 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) { struct instruction *first_insn = insn; - int alt_group = insn->alt_group; + struct alt_group *alt_group = insn->alt_group; sec_for_each_insn_continue(file, insn) { if (insn->alt_group != alt_group) diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index df1e3e8ed204..7893e9783084 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -19,6 +19,17 @@ struct insn_state { s8 instr; }; +struct alt_group { + /* + * Pointer from a replacement group to the original group. NULL if it + * *is* the original group. + */ + struct alt_group *orig_group; + + /* First and last instructions in the group */ + struct instruction *first_insn, *last_insn; +}; + struct instruction { struct list_head list; struct hlist_node hash; @@ -34,7 +45,7 @@ struct instruction { s8 instr; u8 visited; u8 ret_offset; - int alt_group; + struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; struct instruction *first_jump_src; -- cgit v1.2.3 From c9c324dc22aab1687da37001b321b6dfa93a0699 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Fri, 18 Dec 2020 14:26:21 -0600 Subject: objtool: Support stack layout changes in alternatives The ORC unwinder showed a warning [1] which revealed the stack layout didn't match what was expected. The problem was that paravirt patching had replaced "CALL *pv_ops.irq.save_fl" with "PUSHF;POP". That changed the stack layout between the PUSHF and the POP, so unwinding from an interrupt which occurred between those two instructions would fail. Part of the agreed upon solution was to rework the custom paravirt patching code to use alternatives instead, since objtool already knows how to read alternatives (and converging runtime patching infrastructure is always a good thing anyway). But the main problem still remains, which is that runtime patching can change the stack layout. Making stack layout changes in alternatives was disallowed with commit 7117f16bf460 ("objtool: Fix ORC vs alternatives"), but now that paravirt is going to be doing it, it needs to be supported. One way to do so would be to modify the ORC table when the code gets patched. But ORC is simple -- a good thing! -- and it's best to leave it alone. Instead, support stack layout changes by "flattening" all possible stack states (CFI) from parallel alternative code streams into a single set of linear states. The only necessary limitation is that CFI conflicts are disallowed at all possible instruction boundaries. For example, this scenario is allowed: Alt1 Alt2 Alt3 0x00 CALL *pv_ops.save_fl CALL xen_save_fl PUSHF 0x01 POP %RAX 0x02 NOP ... 0x05 NOP ... 0x07 The unwind information for offset-0x00 is identical for all 3 alternatives. Similarly offset-0x05 and higher also are identical (and the same as 0x00). However offset-0x01 has deviating CFI, but that is only relevant for Alt3, neither of the other alternative instruction streams will ever hit that offset. This scenario is NOT allowed: Alt1 Alt2 0x00 CALL *pv_ops.save_fl PUSHF 0x01 NOP6 ... 0x07 NOP POP %RAX The problem here is that offset-0x7, which is an instruction boundary in both possible instruction patch streams, has two conflicting stack layouts. [ The above examples were stolen from Peter Zijlstra. ] The new flattened CFI array is used both for the detection of conflicts (like the second example above) and the generation of linear ORC entries. BTW, another benefit of these changes is that, thanks to some related cleanups (new fake nops and alt_group struct) objtool can finally be rid of fake jumps, which were a constant source of headaches. [1] https://lkml.kernel.org/r/20201111170536.arx2zbn4ngvjoov7@treble Cc: Shinichiro Kawasaki Signed-off-by: Josh Poimboeuf --- tools/objtool/Documentation/stack-validation.txt | 16 +- tools/objtool/check.c | 192 +++++++++++------------ tools/objtool/include/objtool/check.h | 6 + tools/objtool/orc_gen.c | 56 ++++++- 4 files changed, 159 insertions(+), 111 deletions(-) (limited to 'tools') diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt index 0542e46c7552..30f38fdc0d56 100644 --- a/tools/objtool/Documentation/stack-validation.txt +++ b/tools/objtool/Documentation/stack-validation.txt @@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them. function tracing inserts additional calls, which is not obvious from the sources). -10. file.o: warning: func()+0x5c: alternative modifies stack - - This means that an alternative includes instructions that modify the - stack. The problem is that there is only one ORC unwind table, this means - that the ORC unwind entries must be valid for each of the alternatives. - The easiest way to enforce this is to ensure alternatives do not contain - any ORC entries, which in turn implies the above constraint. +10. file.o: warning: func()+0x5c: stack layout conflict in alternatives + + This means that in the use of the alternative() or ALTERNATIVE() + macro, the code paths have conflicting modifications to the stack. + The problem is that there is only one ORC unwind table, which means + that the ORC unwind entries must be consistent for all possible + instruction boundaries regardless of which code has been patched. + This limitation can be overcome by massaging the alternatives with + NOPs to shift the stack changes around so they no longer conflict. 11. file.o: warning: unannotated intra-function call diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 9aa324b14d5d..270b507e7098 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -20,8 +20,6 @@ #include #include -#define FAKE_JUMP_OFFSET -1 - struct alternative { struct list_head list; struct instruction *insn; @@ -775,9 +773,6 @@ static int add_jump_destinations(struct objtool_file *file) if (!is_static_jump(insn)) continue; - if (insn->offset == FAKE_JUMP_OFFSET) - continue; - reloc = find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len); if (!reloc) { @@ -971,28 +966,15 @@ static int add_call_destinations(struct objtool_file *file) } /* - * The .alternatives section requires some extra special care, over and above - * what other special sections require: - * - * 1. Because alternatives are patched in-place, we need to insert a fake jump - * instruction at the end so that validate_branch() skips all the original - * replaced instructions when validating the new instruction path. - * - * 2. An added wrinkle is that the new instruction length might be zero. In - * that case the old instructions are replaced with noops. We simulate that - * by creating a fake jump as the only new instruction. - * - * 3. In some cases, the alternative section includes an instruction which - * conditionally jumps to the _end_ of the entry. We have to modify these - * jumps' destinations to point back to .text rather than the end of the - * entry in .altinstr_replacement. + * The .alternatives section requires some extra special care over and above + * other special sections because alternatives are patched in place. */ static int handle_group_alt(struct objtool_file *file, struct special_alt *special_alt, struct instruction *orig_insn, struct instruction **new_insn) { - struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL; + struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL; struct alt_group *orig_alt_group, *new_alt_group; unsigned long dest_off; @@ -1002,6 +984,13 @@ static int handle_group_alt(struct objtool_file *file, WARN("malloc failed"); return -1; } + orig_alt_group->cfi = calloc(special_alt->orig_len, + sizeof(struct cfi_state *)); + if (!orig_alt_group->cfi) { + WARN("calloc failed"); + return -1; + } + last_orig_insn = NULL; insn = orig_insn; sec_for_each_insn_from(file, insn) { @@ -1015,42 +1004,45 @@ static int handle_group_alt(struct objtool_file *file, orig_alt_group->first_insn = orig_insn; orig_alt_group->last_insn = last_orig_insn; - if (next_insn_same_sec(file, last_orig_insn)) { - fake_jump = malloc(sizeof(*fake_jump)); - if (!fake_jump) { - WARN("malloc failed"); - return -1; - } - memset(fake_jump, 0, sizeof(*fake_jump)); - INIT_LIST_HEAD(&fake_jump->alts); - INIT_LIST_HEAD(&fake_jump->stack_ops); - init_cfi_state(&fake_jump->cfi); - fake_jump->sec = special_alt->new_sec; - fake_jump->offset = FAKE_JUMP_OFFSET; - fake_jump->type = INSN_JUMP_UNCONDITIONAL; - fake_jump->jump_dest = list_next_entry(last_orig_insn, list); - fake_jump->func = orig_insn->func; + new_alt_group = malloc(sizeof(*new_alt_group)); + if (!new_alt_group) { + WARN("malloc failed"); + return -1; } - if (!special_alt->new_len) { - if (!fake_jump) { - WARN("%s: empty alternative at end of section", - special_alt->orig_sec->name); + if (special_alt->new_len < special_alt->orig_len) { + /* + * Insert a fake nop at the end to make the replacement + * alt_group the same size as the original. This is needed to + * allow propagate_alt_cfi() to do its magic. When the last + * instruction affects the stack, the instruction after it (the + * nop) will propagate the new state to the shared CFI array. + */ + nop = malloc(sizeof(*nop)); + if (!nop) { + WARN("malloc failed"); return -1; } + memset(nop, 0, sizeof(*nop)); + INIT_LIST_HEAD(&nop->alts); + INIT_LIST_HEAD(&nop->stack_ops); + init_cfi_state(&nop->cfi); - *new_insn = fake_jump; - return 0; + nop->sec = special_alt->new_sec; + nop->offset = special_alt->new_off + special_alt->new_len; + nop->len = special_alt->orig_len - special_alt->new_len; + nop->type = INSN_NOP; + nop->func = orig_insn->func; + nop->alt_group = new_alt_group; + nop->ignore = orig_insn->ignore_alts; } - new_alt_group = malloc(sizeof(*new_alt_group)); - if (!new_alt_group) { - WARN("malloc failed"); - return -1; + if (!special_alt->new_len) { + *new_insn = nop; + goto end; } - last_new_insn = NULL; insn = *new_insn; sec_for_each_insn_from(file, insn) { struct reloc *alt_reloc; @@ -1089,14 +1081,8 @@ static int handle_group_alt(struct objtool_file *file, continue; dest_off = arch_jump_destination(insn); - if (dest_off == special_alt->new_off + special_alt->new_len) { - if (!fake_jump) { - WARN("%s: alternative jump to end of section", - special_alt->orig_sec->name); - return -1; - } - insn->jump_dest = fake_jump; - } + if (dest_off == special_alt->new_off + special_alt->new_len) + insn->jump_dest = next_insn_same_sec(file, last_orig_insn); if (!insn->jump_dest) { WARN_FUNC("can't find alternative jump destination", @@ -1111,13 +1097,13 @@ static int handle_group_alt(struct objtool_file *file, return -1; } + if (nop) + list_add(&nop->list, &last_new_insn->list); +end: new_alt_group->orig_group = orig_alt_group; new_alt_group->first_insn = *new_insn; - new_alt_group->last_insn = last_new_insn; - - if (fake_jump) - list_add(&fake_jump->list, &last_new_insn->list); - + new_alt_group->last_insn = nop ? : last_new_insn; + new_alt_group->cfi = orig_alt_group->cfi; return 0; } @@ -2248,22 +2234,47 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, return 0; } -static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +/* + * The stack layouts of alternatives instructions can sometimes diverge when + * they have stack modifications. That's fine as long as the potential stack + * layouts don't conflict at any given potential instruction boundary. + * + * Flatten the CFIs of the different alternative code streams (both original + * and replacement) into a single shared CFI array which can be used to detect + * conflicts and nicely feed a linear array of ORC entries to the unwinder. + */ +static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn) { - struct stack_op *op; + struct cfi_state **alt_cfi; + int group_off; - list_for_each_entry(op, &insn->stack_ops, list) { - struct cfi_state old_cfi = state->cfi; - int res; + if (!insn->alt_group) + return 0; - res = update_cfi_state(insn, &state->cfi, op); - if (res) - return res; + alt_cfi = insn->alt_group->cfi; + group_off = insn->offset - insn->alt_group->first_insn->offset; - if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) { - WARN_FUNC("alternative modifies stack", insn->sec, insn->offset); + if (!alt_cfi[group_off]) { + alt_cfi[group_off] = &insn->cfi; + } else { + if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) { + WARN_FUNC("stack layout conflict in alternatives", + insn->sec, insn->offset); return -1; } + } + + return 0; +} + +static int handle_insn_ops(struct instruction *insn, struct insn_state *state) +{ + struct stack_op *op; + + list_for_each_entry(op, &insn->stack_ops, list) { + + if (update_cfi_state(insn, &state->cfi, op)) + return 1; if (op->dest.type == OP_DEST_PUSHF) { if (!state->uaccess_stack) { @@ -2453,28 +2464,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct return 0; } -/* - * Alternatives should not contain any ORC entries, this in turn means they - * should not contain any CFI ops, which implies all instructions should have - * the same same CFI state. - * - * It is possible to constuct alternatives that have unreachable holes that go - * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED - * states which then results in ORC entries, which we just said we didn't want. - * - * Avoid them by copying the CFI entry of the first instruction into the whole - * alternative. - */ -static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn) +static struct instruction *next_insn_to_validate(struct objtool_file *file, + struct instruction *insn) { - struct instruction *first_insn = insn; struct alt_group *alt_group = insn->alt_group; - sec_for_each_insn_continue(file, insn) { - if (insn->alt_group != alt_group) - break; - insn->cfi = first_insn->cfi; - } + /* + * Simulate the fact that alternatives are patched in-place. When the + * end of a replacement alt_group is reached, redirect objtool flow to + * the end of the original alt_group. + */ + if (alt_group && insn == alt_group->last_insn && alt_group->orig_group) + return next_insn_same_sec(file, alt_group->orig_group->last_insn); + + return next_insn_same_sec(file, insn); } /* @@ -2495,7 +2498,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, sec = insn->sec; while (1) { - next_insn = next_insn_same_sec(file, insn); + next_insn = next_insn_to_validate(file, insn); if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", @@ -2528,6 +2531,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, insn->visited |= visited; + if (propagate_alt_cfi(file, insn)) + return 1; + if (!insn->ignore_alts && !list_empty(&insn->alts)) { bool skip_orig = false; @@ -2543,9 +2549,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group) - fill_alternative_cfi(file, insn); - if (skip_orig) return 0; } @@ -2779,9 +2782,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio !strcmp(insn->sec->name, ".altinstr_aux")) return true; - if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET) - return true; - if (!insn->func) return false; diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index 7893e9783084..f4e041fbdab2 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -28,6 +28,12 @@ struct alt_group { /* First and last instructions in the group */ struct instruction *first_insn, *last_insn; + + /* + * Byte-offset-addressed len-sized array of pointers to CFI structs. + * This is shared with the other alt_groups in the same alternative. + */ + struct cfi_state **cfi; }; struct instruction { diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index 38e1a8dbfff0..738aa5021bc4 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -144,6 +144,13 @@ static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc, return 0; } +static unsigned long alt_group_len(struct alt_group *alt_group) +{ + return alt_group->last_insn->offset + + alt_group->last_insn->len - + alt_group->first_insn->offset; +} + int orc_create(struct objtool_file *file) { struct section *sec, *ip_rsec, *orc_sec; @@ -168,15 +175,48 @@ int orc_create(struct objtool_file *file) continue; sec_for_each_insn(file, sec, insn) { - if (init_orc_entry(&orc, &insn->cfi)) - return -1; - if (!memcmp(&prev_orc, &orc, sizeof(orc))) + struct alt_group *alt_group = insn->alt_group; + int i; + + if (!alt_group) { + if (init_orc_entry(&orc, &insn->cfi)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, sec, + insn->offset)) + return -1; + nr++; + prev_orc = orc; + empty = false; continue; - if (orc_list_add(&orc_list, &orc, sec, insn->offset)) - return -1; - nr++; - prev_orc = orc; - empty = false; + } + + /* + * Alternatives can have different stack layout + * possibilities (but they shouldn't conflict). + * Instead of traversing the instructions, use the + * alt_group's flattened byte-offset-addressed CFI + * array. + */ + for (i = 0; i < alt_group_len(alt_group); i++) { + struct cfi_state *cfi = alt_group->cfi[i]; + if (!cfi) + continue; + if (init_orc_entry(&orc, cfi)) + return -1; + if (!memcmp(&prev_orc, &orc, sizeof(orc))) + continue; + if (orc_list_add(&orc_list, &orc, insn->sec, + insn->offset + i)) + return -1; + nr++; + prev_orc = orc; + empty = false; + } + + /* Skip to the end of the alt_group */ + insn = alt_group->last_insn; } /* Add a section terminator */ -- cgit v1.2.3 From 2a1867219c7b27f928e2545782b86daaf9ad50bd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Jan 2021 14:19:53 +0100 Subject: fs: add mount_setattr() This implements the missing mount_setattr() syscall. While the new mount api allows to change the properties of a superblock there is currently no way to change the properties of a mount or a mount tree using file descriptors which the new mount api is based on. In addition the old mount api has the restriction that mount options cannot be applied recursively. This hasn't changed since changing mount options on a per-mount basis was implemented in [1] and has been a frequent request not just for convenience but also for security reasons. The legacy mount syscall is unable to accommodate this behavior without introducing a whole new set of flags because MS_REC | MS_REMOUNT | MS_BIND | MS_RDONLY | MS_NOEXEC | [...] only apply the mount option to the topmost mount. Changing MS_REC to apply to the whole mount tree would mean introducing a significant uapi change and would likely cause significant regressions. The new mount_setattr() syscall allows to recursively clear and set mount options in one shot. Multiple calls to change mount options requesting the same changes are idempotent: int mount_setattr(int dfd, const char *path, unsigned flags, struct mount_attr *uattr, size_t usize); Flags to modify path resolution behavior are specified in the @flags argument. Currently, AT_EMPTY_PATH, AT_RECURSIVE, AT_SYMLINK_NOFOLLOW, and AT_NO_AUTOMOUNT are supported. If useful, additional lookup flags to restrict path resolution as introduced with openat2() might be supported in the future. The mount_setattr() syscall can be expected to grow over time and is designed with extensibility in mind. It follows the extensible syscall pattern we have used with other syscalls such as openat2(), clone3(), sched_{set,get}attr(), and others. The set of mount options is passed in the uapi struct mount_attr which currently has the following layout: struct mount_attr { __u64 attr_set; __u64 attr_clr; __u64 propagation; __u64 userns_fd; }; The @attr_set and @attr_clr members are used to clear and set mount options. This way a user can e.g. request that a set of flags is to be raised such as turning mounts readonly by raising MOUNT_ATTR_RDONLY in @attr_set while at the same time requesting that another set of flags is to be lowered such as removing noexec from a mount tree by specifying MOUNT_ATTR_NOEXEC in @attr_clr. Note, since the MOUNT_ATTR_ values are an enum starting from 0, not a bitmap, users wanting to transition to a different atime setting cannot simply specify the atime setting in @attr_set, but must also specify MOUNT_ATTR__ATIME in the @attr_clr field. So we ensure that MOUNT_ATTR__ATIME can't be partially set in @attr_clr and that @attr_set can't have any atime bits set if MOUNT_ATTR__ATIME isn't set in @attr_clr. The @propagation field lets callers specify the propagation type of a mount tree. Propagation is a single property that has four different settings and as such is not really a flag argument but an enum. Specifically, it would be unclear what setting and clearing propagation settings in combination would amount to. The legacy mount() syscall thus forbids the combination of multiple propagation settings too. The goal is to keep the semantics of mount propagation somewhat simple as they are overly complex as it is. The @userns_fd field lets user specify a user namespace whose idmapping becomes the idmapping of the mount. This is implemented and explained in detail in the next patch. [1]: commit 2e4b7fcd9260 ("[PATCH] r/o bind mounts: honor mount writer counts at remount") Link: https://lore.kernel.org/r/20210121131959.646623-35-christian.brauner@ubuntu.com Cc: David Howells Cc: Aleksa Sarai Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Cc: linux-api@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Christian Brauner --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + fs/namespace.c | 263 ++++++++++++++++++++++++++++ include/linux/syscalls.h | 4 + include/uapi/asm-generic/unistd.h | 4 +- include/uapi/linux/mount.h | 15 ++ tools/include/uapi/asm-generic/unistd.h | 4 +- 23 files changed, 307 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index a6617067dbe6..02f0244e005c 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 549 common faccessat2 sys_faccessat2 550 common process_madvise sys_process_madvise 551 common epoll_pwait2 sys_epoll_pwait2 +552 common mount_setattr sys_mount_setattr diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 20e1170e2e0a..dcc1191291a2 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -455,3 +455,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 86a9d7b3eabe..949788f5ba40 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 442 +#define __NR_compat_syscalls 443 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index cccfbbefbf95..3d874f624056 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -891,6 +891,8 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) __SYSCALL(__NR_process_madvise, sys_process_madvise) #define __NR_epoll_pwait2 441 __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index bfc00f2bd437..d89231166e19 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -362,3 +362,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 7fe4e45c864c..72bde6707dd3 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -441,3 +441,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a522adf194ab..d603a5ec9338 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -447,3 +447,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 0f03ad223f33..8fd8c1790941 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -380,3 +380,4 @@ 439 n32 faccessat2 sys_faccessat2 440 n32 process_madvise sys_process_madvise 441 n32 epoll_pwait2 compat_sys_epoll_pwait2 +442 n32 mount_setattr sys_mount_setattr diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 91649690b52f..169f21438065 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -356,3 +356,4 @@ 439 n64 faccessat2 sys_faccessat2 440 n64 process_madvise sys_process_madvise 441 n64 epoll_pwait2 sys_epoll_pwait2 +442 n64 mount_setattr sys_mount_setattr diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 4bad0c40aed6..090d29ca80ff 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -429,3 +429,4 @@ 439 o32 faccessat2 sys_faccessat2 440 o32 process_madvise sys_process_madvise 441 o32 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 o32 mount_setattr sys_mount_setattr diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 6bcc31966b44..271a92519683 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -439,3 +439,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index f744eb5cba88..72e5aa67ab8a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -531,3 +531,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index d443423495e5..3abef2144dac 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr sys_mount_setattr diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 9df40ac0ebc0..d08eebad6b7f 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 40d8c7cd8298..84403a99039c 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -487,3 +487,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 874aeacde2dd..a1c9f496fca6 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -446,3 +446,4 @@ 439 i386 faccessat2 sys_faccessat2 440 i386 process_madvise sys_process_madvise 441 i386 epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 i386 mount_setattr sys_mount_setattr diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 78672124d28b..7bf01cbe582f 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 46116a28eeed..365a9b849224 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -412,3 +412,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/fs/namespace.c b/fs/namespace.c index 00ed0d6cb2ee..726a51c8c46e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -73,6 +73,14 @@ static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ +struct mount_kattr { + unsigned int attr_set; + unsigned int attr_clr; + unsigned int propagation; + unsigned int lookup_flags; + bool recurse; +}; + /* /sys/fs */ struct kobject *fs_kobj; EXPORT_SYMBOL_GPL(fs_kobj); @@ -3469,6 +3477,11 @@ out_type: (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \ MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME) +#define MOUNT_SETATTR_VALID_FLAGS FSMOUNT_VALID_FLAGS + +#define MOUNT_SETATTR_PROPAGATION_FLAGS \ + (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED) + static unsigned int attr_flags_to_mnt_flags(u64 attr_flags) { unsigned int mnt_flags = 0; @@ -3820,6 +3833,256 @@ out0: return error; } +static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt) +{ + unsigned int flags = mnt->mnt.mnt_flags; + + /* flags to clear */ + flags &= ~kattr->attr_clr; + /* flags to raise */ + flags |= kattr->attr_set; + + return flags; +} + +static struct mount *mount_setattr_prepare(struct mount_kattr *kattr, + struct mount *mnt, int *err) +{ + struct mount *m = mnt, *last = NULL; + + if (!is_mounted(&m->mnt)) { + *err = -EINVAL; + goto out; + } + + if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) { + *err = -EINVAL; + goto out; + } + + do { + unsigned int flags; + + flags = recalc_flags(kattr, m); + if (!can_change_locked_flags(m, flags)) { + *err = -EPERM; + goto out; + } + + last = m; + + if ((kattr->attr_set & MNT_READONLY) && + !(m->mnt.mnt_flags & MNT_READONLY)) { + *err = mnt_hold_writers(m); + if (*err) + goto out; + } + } while (kattr->recurse && (m = next_mnt(m, mnt))); + +out: + return last; +} + +static void mount_setattr_commit(struct mount_kattr *kattr, + struct mount *mnt, struct mount *last, + int err) +{ + struct mount *m = mnt; + + do { + if (!err) { + unsigned int flags; + + flags = recalc_flags(kattr, m); + WRITE_ONCE(m->mnt.mnt_flags, flags); + } + + /* + * We either set MNT_READONLY above so make it visible + * before ~MNT_WRITE_HOLD or we failed to recursively + * apply mount options. + */ + if ((kattr->attr_set & MNT_READONLY) && + (m->mnt.mnt_flags & MNT_WRITE_HOLD)) + mnt_unhold_writers(m); + + if (!err && kattr->propagation) + change_mnt_propagation(m, kattr->propagation); + + /* + * On failure, only cleanup until we found the first mount + * we failed to handle. + */ + if (err && m == last) + break; + } while (kattr->recurse && (m = next_mnt(m, mnt))); + + if (!err) + touch_mnt_namespace(mnt->mnt_ns); +} + +static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) +{ + struct mount *mnt = real_mount(path->mnt), *last = NULL; + int err = 0; + + if (path->dentry != mnt->mnt.mnt_root) + return -EINVAL; + + if (kattr->propagation) { + /* + * Only take namespace_lock() if we're actually changing + * propagation. + */ + namespace_lock(); + if (kattr->propagation == MS_SHARED) { + err = invent_group_ids(mnt, kattr->recurse); + if (err) { + namespace_unlock(); + return err; + } + } + } + + lock_mount_hash(); + + /* + * Get the mount tree in a shape where we can change mount + * properties without failure. + */ + last = mount_setattr_prepare(kattr, mnt, &err); + if (last) /* Commit all changes or revert to the old state. */ + mount_setattr_commit(kattr, mnt, last, err); + + unlock_mount_hash(); + + if (kattr->propagation) { + namespace_unlock(); + if (err) + cleanup_group_ids(mnt, NULL); + } + + return err; +} + +static int build_mount_kattr(const struct mount_attr *attr, + struct mount_kattr *kattr, unsigned int flags) +{ + unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; + + if (flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + *kattr = (struct mount_kattr) { + .lookup_flags = lookup_flags, + .recurse = !!(flags & AT_RECURSIVE), + }; + + if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) + return -EINVAL; + if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) + return -EINVAL; + kattr->propagation = attr->propagation; + + if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS) + return -EINVAL; + + if (attr->userns_fd) + return -EINVAL; + + kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set); + kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr); + + /* + * Since the MOUNT_ATTR_ values are an enum, not a bitmap, + * users wanting to transition to a different atime setting cannot + * simply specify the atime setting in @attr_set, but must also + * specify MOUNT_ATTR__ATIME in the @attr_clr field. + * So ensure that MOUNT_ATTR__ATIME can't be partially set in + * @attr_clr and that @attr_set can't have any atime bits set if + * MOUNT_ATTR__ATIME isn't set in @attr_clr. + */ + if (attr->attr_clr & MOUNT_ATTR__ATIME) { + if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME) + return -EINVAL; + + /* + * Clear all previous time settings as they are mutually + * exclusive. + */ + kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME; + switch (attr->attr_set & MOUNT_ATTR__ATIME) { + case MOUNT_ATTR_RELATIME: + kattr->attr_set |= MNT_RELATIME; + break; + case MOUNT_ATTR_NOATIME: + kattr->attr_set |= MNT_NOATIME; + break; + case MOUNT_ATTR_STRICTATIME: + break; + default: + return -EINVAL; + } + } else { + if (attr->attr_set & MOUNT_ATTR__ATIME) + return -EINVAL; + } + + return 0; +} + +SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, + unsigned int, flags, struct mount_attr __user *, uattr, + size_t, usize) +{ + int err; + struct path target; + struct mount_attr attr; + struct mount_kattr kattr; + + BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); + + if (flags & ~(AT_EMPTY_PATH | + AT_RECURSIVE | + AT_SYMLINK_NOFOLLOW | + AT_NO_AUTOMOUNT)) + return -EINVAL; + + if (unlikely(usize > PAGE_SIZE)) + return -E2BIG; + if (unlikely(usize < MOUNT_ATTR_SIZE_VER0)) + return -EINVAL; + + if (!may_mount()) + return -EPERM; + + err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); + if (err) + return err; + + /* Don't bother walking through the mounts if this is a nop. */ + if (attr.attr_set == 0 && + attr.attr_clr == 0 && + attr.propagation == 0) + return 0; + + err = build_mount_kattr(&attr, &kattr, flags); + if (err) + return err; + + err = user_path_at(dfd, path, kattr.lookup_flags, &target); + if (err) + return err; + + err = do_mount_setattr(&target, &kattr); + path_put(&target); + return err; +} + static void __init init_mount_tree(void) { struct vfsmount *mnt; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 7688bc983de5..cd7b5c817ba2 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -68,6 +68,7 @@ union bpf_attr; struct io_uring_params; struct clone_args; struct open_how; +struct mount_attr; #include #include @@ -1028,6 +1029,9 @@ asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, int to_dfd, const char __user *to_path, unsigned int ms_flags); +asmlinkage long sys_mount_setattr(int dfd, const char __user *path, + unsigned int flags, + struct mount_attr __user *uattr, size_t usize); asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags); asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key, const void __user *value, int aux); diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 728752917785..ce58cff99b66 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) __SYSCALL(__NR_process_madvise, sys_process_madvise) #define __NR_epoll_pwait2 441 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) #undef __NR_syscalls -#define __NR_syscalls 442 +#define __NR_syscalls 443 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index dd8306ea336c..d3bcefdfa73d 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_MOUNT_H #define _UAPI_LINUX_MOUNT_H +#include + /* * These are the fs-independent mount-flags: up to 32 flags are supported * @@ -118,4 +120,17 @@ enum fsconfig_command { #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 728752917785..ce58cff99b66 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2) __SYSCALL(__NR_process_madvise, sys_process_madvise) #define __NR_epoll_pwait2 441 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2) +#define __NR_mount_setattr 442 +__SYSCALL(__NR_mount_setattr, sys_mount_setattr) #undef __NR_syscalls -#define __NR_syscalls 442 +#define __NR_syscalls 443 /* * 32 bit systems traditionally used different -- cgit v1.2.3 From 01eadc8dd96d7215dda71a1fe74b07b0e3b48af5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 21 Jan 2021 14:19:55 +0100 Subject: tests: add mount_setattr() selftests Add a range of selftests for the new mount_setattr() syscall to verify that it works as expected. This tests that: - no invalid flags can be specified - changing properties of a single mount works and leaves other mounts in the mount tree unchanged - changing a mount tre to read-only when one of the mounts has writers fails and leaves the whole mount tree unchanged - changing mount properties from multiple threads works - changing atime settings works - changing mount propagation works - changing the mount options of a mount tree where the individual mounts in the tree have different mount options only changes the flags that were requested to change - changing mount options from another mount namespace fails - changing mount options from another user namespace fails - idmapped mounts Note, the main test-suite for idmapped mounts is part of xfstests and is pretty huge. These tests here just make sure that the syscalls bits work correctly. TAP version 13 1..20 # Starting 20 tests from 3 test cases. # RUN mount_setattr.invalid_attributes ... # OK mount_setattr.invalid_attributes ok 1 mount_setattr.invalid_attributes # RUN mount_setattr.extensibility ... # OK mount_setattr.extensibility ok 2 mount_setattr.extensibility # RUN mount_setattr.basic ... # OK mount_setattr.basic ok 3 mount_setattr.basic # RUN mount_setattr.basic_recursive ... # OK mount_setattr.basic_recursive ok 4 mount_setattr.basic_recursive # RUN mount_setattr.mount_has_writers ... # OK mount_setattr.mount_has_writers ok 5 mount_setattr.mount_has_writers # RUN mount_setattr.mixed_mount_options ... # OK mount_setattr.mixed_mount_options ok 6 mount_setattr.mixed_mount_options # RUN mount_setattr.time_changes ... # OK mount_setattr.time_changes ok 7 mount_setattr.time_changes # RUN mount_setattr.multi_threaded ... # OK mount_setattr.multi_threaded ok 8 mount_setattr.multi_threaded # RUN mount_setattr.wrong_user_namespace ... # OK mount_setattr.wrong_user_namespace ok 9 mount_setattr.wrong_user_namespace # RUN mount_setattr.wrong_mount_namespace ... # OK mount_setattr.wrong_mount_namespace ok 10 mount_setattr.wrong_mount_namespace # RUN mount_setattr_idmapped.invalid_fd_negative ... # OK mount_setattr_idmapped.invalid_fd_negative ok 11 mount_setattr_idmapped.invalid_fd_negative # RUN mount_setattr_idmapped.invalid_fd_large ... # OK mount_setattr_idmapped.invalid_fd_large ok 12 mount_setattr_idmapped.invalid_fd_large # RUN mount_setattr_idmapped.invalid_fd_closed ... # OK mount_setattr_idmapped.invalid_fd_closed ok 13 mount_setattr_idmapped.invalid_fd_closed # RUN mount_setattr_idmapped.invalid_fd_initial_userns ... # OK mount_setattr_idmapped.invalid_fd_initial_userns ok 14 mount_setattr_idmapped.invalid_fd_initial_userns # RUN mount_setattr_idmapped.attached_mount_inside_current_mount_namespace ... # OK mount_setattr_idmapped.attached_mount_inside_current_mount_namespace ok 15 mount_setattr_idmapped.attached_mount_inside_current_mount_namespace # RUN mount_setattr_idmapped.attached_mount_outside_current_mount_namespace ... # OK mount_setattr_idmapped.attached_mount_outside_current_mount_namespace ok 16 mount_setattr_idmapped.attached_mount_outside_current_mount_namespace # RUN mount_setattr_idmapped.detached_mount_inside_current_mount_namespace ... # OK mount_setattr_idmapped.detached_mount_inside_current_mount_namespace ok 17 mount_setattr_idmapped.detached_mount_inside_current_mount_namespace # RUN mount_setattr_idmapped.detached_mount_outside_current_mount_namespace ... # OK mount_setattr_idmapped.detached_mount_outside_current_mount_namespace ok 18 mount_setattr_idmapped.detached_mount_outside_current_mount_namespace # RUN mount_setattr_idmapped.change_idmapping ... # OK mount_setattr_idmapped.change_idmapping ok 19 mount_setattr_idmapped.change_idmapping # RUN mount_setattr_idmapped.idmap_mount_tree_invalid ... # OK mount_setattr_idmapped.idmap_mount_tree_invalid ok 20 mount_setattr_idmapped.idmap_mount_tree_invalid # PASSED: 20 / 20 tests passed. # Totals: pass:20 fail:0 xfail:0 xpass:0 skip:0 error:0 Link: https://lore.kernel.org/r/20210121131959.646623-37-christian.brauner@ubuntu.com Cc: Christoph Hellwig Cc: David Howells Cc: Al Viro Cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/mount_setattr/.gitignore | 1 + tools/testing/selftests/mount_setattr/Makefile | 7 + tools/testing/selftests/mount_setattr/config | 1 + .../selftests/mount_setattr/mount_setattr_test.c | 1424 ++++++++++++++++++++ 5 files changed, 1434 insertions(+) create mode 100644 tools/testing/selftests/mount_setattr/.gitignore create mode 100644 tools/testing/selftests/mount_setattr/Makefile create mode 100644 tools/testing/selftests/mount_setattr/config create mode 100644 tools/testing/selftests/mount_setattr/mount_setattr_test.c (limited to 'tools') diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 8a917cb4426a..157179afd5b7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -33,6 +33,7 @@ TARGETS += memfd TARGETS += memory-hotplug TARGETS += mincore TARGETS += mount +TARGETS += mount_setattr TARGETS += mqueue TARGETS += net TARGETS += net/forwarding diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore new file mode 100644 index 000000000000..5f74d8488472 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/.gitignore @@ -0,0 +1 @@ +mount_setattr_test diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile new file mode 100644 index 000000000000..2250f7dcb81e --- /dev/null +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for mount selftests. +CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread + +TEST_GEN_FILES += mount_setattr_test + +include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config new file mode 100644 index 000000000000..416bd53ce982 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/config @@ -0,0 +1 @@ +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c new file mode 100644 index 000000000000..4e94e566e040 --- /dev/null +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -0,0 +1,1424 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#ifndef CLONE_NEWNS +#define CLONE_NEWNS 0x00020000 +#endif + +#ifndef CLONE_NEWUSER +#define CLONE_NEWUSER 0x10000000 +#endif + +#ifndef MS_REC +#define MS_REC 16384 +#endif + +#ifndef MS_RELATIME +#define MS_RELATIME (1 << 21) +#endif + +#ifndef MS_STRICTATIME +#define MS_STRICTATIME (1 << 24) +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1 << 20) +#endif + +#define DEFAULT_THREADS 4 +#define ptr_to_int(p) ((int)((intptr_t)(p))) +#define int_to_ptr(u) ((void *)((intptr_t)(u))) + +#ifndef __NR_mount_setattr + #if defined __alpha__ + #define __NR_mount_setattr 552 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_mount_setattr (442 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_mount_setattr (442 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_mount_setattr (442 + 5000) + #endif + #elif defined __ia64__ + #define __NR_mount_setattr (442 + 1024) + #else + #define __NR_mount_setattr 442 + #endif + +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #elif defined __ia64__ + #define __NR_open_tree (428 + 1024) + #else + #define __NR_open_tree 428 + #endif +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 +#endif + +static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); +} + +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +static int prepare_unpriv_mountns(void) +{ + if (create_and_enter_userns()) + return -1; + + if (unshare(CLONE_NEWNS)) + return -1; + + if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0)) + return -1; + + return 0; +} + +static int read_mnt_flags(const char *path) +{ + int ret; + struct statvfs stat; + unsigned int mnt_flags; + + ret = statvfs(path, &stat); + if (ret != 0) + return -EINVAL; + + if (stat.f_flag & + ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME | + ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK)) + return -EINVAL; + + mnt_flags = 0; + if (stat.f_flag & ST_RDONLY) + mnt_flags |= MS_RDONLY; + if (stat.f_flag & ST_NOSUID) + mnt_flags |= MS_NOSUID; + if (stat.f_flag & ST_NODEV) + mnt_flags |= MS_NODEV; + if (stat.f_flag & ST_NOEXEC) + mnt_flags |= MS_NOEXEC; + if (stat.f_flag & ST_NOATIME) + mnt_flags |= MS_NOATIME; + if (stat.f_flag & ST_NODIRATIME) + mnt_flags |= MS_NODIRATIME; + if (stat.f_flag & ST_RELATIME) + mnt_flags |= MS_RELATIME; + if (stat.f_flag & ST_SYNCHRONOUS) + mnt_flags |= MS_SYNCHRONOUS; + if (stat.f_flag & ST_MANDLOCK) + mnt_flags |= ST_MANDLOCK; + + return mnt_flags; +} + +static char *get_field(char *src, int nfields) +{ + int i; + char *p = src; + + for (i = 0; i < nfields; i++) { + while (*p && *p != ' ' && *p != '\t') + p++; + + if (!*p) + break; + + p++; + } + + return p; +} + +static void null_endofword(char *word) +{ + while (*word && *word != ' ' && *word != '\t') + word++; + *word = '\0'; +} + +static bool is_shared_mount(const char *path) +{ + size_t len = 0; + char *line = NULL; + FILE *f = NULL; + + f = fopen("/proc/self/mountinfo", "re"); + if (!f) + return false; + + while (getline(&line, &len, f) != -1) { + char *opts, *target; + + target = get_field(line, 4); + if (!target) + continue; + + opts = get_field(target, 2); + if (!opts) + continue; + + null_endofword(target); + + if (strcmp(target, path) != 0) + continue; + + null_endofword(opts); + if (strstr(opts, "shared:")) + return true; + } + + free(line); + fclose(f); + + return false; +} + +static void *mount_setattr_thread(void *data) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID, + .attr_clr = 0, + .propagation = MS_SHARED, + }; + + if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr))) + pthread_exit(int_to_ptr(-1)); + + pthread_exit(int_to_ptr(0)); +} + +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + +static bool mount_setattr_supported(void) +{ + int ret; + + ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0); + if (ret < 0 && errno == ENOSYS) + return false; + + return true; +} + +FIXTURE(mount_setattr) { +}; + +FIXTURE_SETUP(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(prepare_unpriv_mountns(), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); +} + +FIXTURE_TEARDOWN(mount_setattr) +{ + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +TEST_F(mount_setattr, invalid_attributes) +{ + struct mount_attr invalid_attr = { + .attr_set = (1U << 31), + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = 0; + invalid_attr.attr_clr = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_clr = 0; + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + invalid_attr.attr_set = (1U << 31); + invalid_attr.attr_clr = (1U << 31); + invalid_attr.propagation = (1U << 31); + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); + + ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr)), 0); +} + +TEST_F(mount_setattr, extensibility) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + char *s = "dummy"; + struct mount_attr invalid_attr = {}; + struct mount_attr_large { + struct mount_attr attr1; + struct mount_attr attr2; + struct mount_attr attr3; + } large_attr = {}; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EFAULT); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s, + sizeof(invalid_attr)), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, + sizeof(invalid_attr) / 2), 0); + ASSERT_EQ(errno, EINVAL); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + large_attr.attr3.attr_set = 0; + large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, + (void *)&large_attr, sizeof(large_attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, basic) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); +} + +TEST_F(mount_setattr, basic_recursive) +{ + int fd; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOEXEC; + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_RDONLY; + attr.propagation = MS_SHARED; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RDONLY; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open for writing so this needs to fail somewhere + * in the middle and the mount options need to be unchanged. + */ + attr.attr_set = MOUNT_ATTR_RDONLY; + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); + + EXPECT_EQ(close(fd), 0); +} + +TEST_F(mount_setattr, mount_has_writers) +{ + int fd, dfd; + unsigned int old_flags = 0, new_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME, + .attr_clr = MOUNT_ATTR__ATIME, + .propagation = MS_SHARED, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777); + ASSERT_GE(fd, 0); + + /* + * We're holding a fd open to a mount somwhere in the middle so this + * needs to fail somewhere in the middle. After this the mount options + * need to be unchanged. + */ + ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), false); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, old_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false); + + dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(dfd, 0); + EXPECT_EQ(fsync(dfd), 0); + EXPECT_EQ(close(dfd), 0); + + EXPECT_EQ(fsync(fd), 0); + EXPECT_EQ(close(fd), 0); + + /* All writers are gone so this should succeed. */ + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); +} + +TEST_F(mount_setattr, mixed_mount_options) +{ + unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME, + .attr_set = MOUNT_ATTR_RELATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags1 = read_mnt_flags("/mnt/B"); + ASSERT_GT(old_flags1, 0); + + old_flags2 = read_mnt_flags("/mnt/B/BB"); + ASSERT_GT(old_flags2, 0); + + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B"); + ASSERT_EQ(new_flags, expected_flags); + + expected_flags = old_flags2; + expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID); + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, time_changes) +{ + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_set = 0; + attr.attr_clr = MOUNT_ATTR_STRICTATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + attr.attr_clr = MOUNT_ATTR_NOATIME; + ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME; + attr.attr_clr = MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags = old_flags; + expected_flags |= MS_NOATIME; + expected_flags |= MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_NOATIME; + attr.attr_set |= MOUNT_ATTR_RELATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NOATIME; + expected_flags |= MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_RELATIME; + attr.attr_set |= MOUNT_ATTR_STRICTATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_RELATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_set &= ~MOUNT_ATTR_STRICTATIME; + attr.attr_set |= MOUNT_ATTR_NOATIME; + attr.attr_clr |= MOUNT_ATTR__ATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags |= MS_NOATIME; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + memset(&attr, 0, sizeof(attr)); + attr.attr_clr = MOUNT_ATTR_NODIRATIME; + ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0); + + expected_flags &= ~MS_NODIRATIME; + + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); +} + +TEST_F(mount_setattr, multi_threaded) +{ + int i, j, nthreads, ret = 0; + unsigned int old_flags = 0, new_flags = 0, expected_flags = 0; + pthread_attr_t pattr; + pthread_t threads[DEFAULT_THREADS]; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + old_flags = read_mnt_flags("/mnt/A"); + ASSERT_GT(old_flags, 0); + + /* Try to change mount options from multiple threads. */ + nthreads = get_nprocs_conf(); + if (nthreads > DEFAULT_THREADS) + nthreads = DEFAULT_THREADS; + + pthread_attr_init(&pattr); + for (i = 0; i < nthreads; i++) + ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0); + + for (j = 0; j < i; j++) { + void *retptr = NULL; + + EXPECT_EQ(pthread_join(threads[j], &retptr), 0); + + ret += ptr_to_int(retptr); + EXPECT_EQ(ret, 0); + } + pthread_attr_destroy(&pattr); + + ASSERT_EQ(ret, 0); + + expected_flags = old_flags; + expected_flags |= MS_RDONLY; + expected_flags |= MS_NOSUID; + new_flags = read_mnt_flags("/mnt/A"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A"), true); + + new_flags = read_mnt_flags("/mnt/A/AA"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true); + + new_flags = read_mnt_flags("/mnt/A/AA/B/BB"); + ASSERT_EQ(new_flags, expected_flags); + + ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true); +} + +TEST_F(mount_setattr, wrong_user_namespace) +{ + int ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + EXPECT_EQ(create_and_enter_userns(), 0); + ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EPERM); +} + +TEST_F(mount_setattr, wrong_mount_namespace) +{ + int fd, ret; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_RDONLY, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr)); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EINVAL); +} + +FIXTURE(mount_setattr_idmapped) { +}; + +FIXTURE_SETUP(mount_setattr_idmapped) +{ + int img_fd = -EBADF; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0); + + (void)umount2("/mnt", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); + + ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0); + ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0); + ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + + ASSERT_EQ(mkdir("/mnt/B", 0777), 0); + + ASSERT_EQ(mount("testing", "/mnt/B", "ramfs", + MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0); + + ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0); + + ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts", + MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0); + + ASSERT_EQ(mkdir("/mnt/C", 0777), 0); + ASSERT_EQ(mkdir("/mnt/D", 0777), 0); + img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); + ASSERT_GE(img_fd, 0); + ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); + ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); + ASSERT_EQ(close(img_fd), 0); +} + +FIXTURE_TEARDOWN(mount_setattr_idmapped) +{ + (void)umount2("/mnt/A", MNT_DETACH); + (void)umount2("/tmp", MNT_DETACH); +} + +/** + * Validate that negative fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_negative) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = -EBADF, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with negative fd"); + } +} + +/** + * Validate that excessively large fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_large) +{ + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + .userns_fd = INT64_MAX, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with too large fd value"); + } +} + +/** + * Validate that closed fd values are rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_closed) +{ + int fd; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_GE(close(fd), 0); + + attr.userns_fd = fd; + ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) { + TH_LOG("failure: created idmapped mount with closed fd"); + } +} + +/** + * Validate that the initial user namespace is rejected. + */ +TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(errno, EPERM); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid, + unsigned long range) +{ + char map[100], procfile[256]; + + snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + + snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid); + snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range); + if (write_file(procfile, map, strlen(map))) + return -1; + + return 0; +} + +#define __STACK_SIZE (8 * 1024 * 1024) +static pid_t do_clone(int (*fn)(void *), void *arg, int flags) +{ + void *stack; + + stack = malloc(__STACK_SIZE); + if (!stack) + return -ENOMEM; + +#ifdef __ia64__ + return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#else + return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL); +#endif +} + +static int get_userns_fd_cb(void *data) +{ + return kill(getpid(), SIGSTOP); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range) +{ + int ret; + pid_t pid; + char path[256]; + + pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER); + if (pid < 0) + return -errno; + + ret = map_ids(pid, nsid, hostid, range); + if (ret < 0) + return ret; + + snprintf(path, sizeof(path), "/proc/%d/ns/user", pid); + ret = open(path, O_RDONLY | O_CLOEXEC); + kill(pid, SIGKILL); + wait_for_pid(pid); + return ret; +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that idmapping a mount is rejected if the mount's mount namespace + * and our mount namespace don't match. + * (The kernel enforces that the mount's mount namespace and the caller's mount + * namespace match.) + */ +TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, + sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that an attached mount in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that a detached mount not in our mount namespace can be idmapped. + */ +TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + /* Changing mount properties on a detached mount. */ + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +/** + * Validate that currently changing the idmapping of an idmapped mount fails. + */ +TEST_F(mount_setattr_idmapped, change_idmapping) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/D", + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", + AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + + /* Change idmapping on a detached mount that is already idmapped. */ + attr.userns_fd = get_userns_fd(0, 20000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); +} + +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + +TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) +{ + int open_tree_fd = -EBADF; + struct mount_attr attr = { + .attr_set = MOUNT_ATTR_IDMAP, + }; + + if (!mount_setattr_supported()) + SKIP(return, "mount_setattr syscall not supported"); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + + open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", + AT_RECURSIVE | + AT_EMPTY_PATH | + AT_NO_AUTOMOUNT | + AT_SYMLINK_NOFOLLOW | + OPEN_TREE_CLOEXEC | + OPEN_TREE_CLONE); + ASSERT_GE(open_tree_fd, 0); + + attr.userns_fd = get_userns_fd(0, 10000, 10000); + ASSERT_GE(attr.userns_fd, 0); + ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); + ASSERT_EQ(close(open_tree_fd), 0); + + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); + ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 6f567c9300a5ebd7b18c26dda1c8d6ffbdd0debd Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:17 -0600 Subject: objtool: Fix error handling for STD/CLD warnings Actually return an error (and display a backtrace, if requested) for directional bit warnings. Fixes: 2f0f9e9ad7b3 ("objtool: Add Direction Flag validation") Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/dc70f2adbc72f09526f7cab5b6feb8bf7f6c5ad4.1611263461.git.jpoimboe@redhat.com --- tools/objtool/check.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 270b507e7098..3bdd946c2027 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -2651,15 +2651,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; case INSN_STD: - if (state.df) + if (state.df) { WARN_FUNC("recursive STD", sec, insn->offset); + return 1; + } state.df = true; break; case INSN_CLD: - if (!state.df && func) + if (!state.df && func) { WARN_FUNC("redundant CLD", sec, insn->offset); + return 1; + } state.df = false; break; -- cgit v1.2.3 From 1f9a1b74942485a0a29e7c4a9a9f2fe8aea17766 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:18 -0600 Subject: objtool: Fix retpoline detection in asm code The JMP_NOSPEC macro branches to __x86_retpoline_*() rather than the __x86_indirect_thunk_*() wrappers used by C code. Detect jumps to __x86_retpoline_*() as retpoline dynamic jumps. Presumably this doesn't trigger a user-visible bug. I only found it when testing vmlinux.o validation. Fixes: 39b735332cb8 ("objtool: Detect jumps to retpoline thunks") Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/31f5833e2e4f01e3d755889ac77e3661e906c09f.1611263461.git.jpoimboe@redhat.com --- tools/objtool/arch/x86/special.c | 2 +- tools/objtool/check.c | 3 ++- tools/objtool/include/objtool/check.h | 11 +++++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index b4bd3505fc94..e707d9bcd161 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt, * replacement group. */ return insn->offset == special_alt->new_off && - (insn->type == INSN_CALL || is_static_jump(insn)); + (insn->type == INSN_CALL || is_jump(insn)); } /* diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 3bdd946c2027..081572170f6b 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -785,7 +785,8 @@ static int add_jump_destinations(struct objtool_file *file) dest_sec = reloc->sym->sec; dest_off = reloc->sym->sym.st_value + arch_dest_reloc_offset(reloc->addend); - } else if (strstr(reloc->sym->name, "_indirect_thunk_")) { + } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || + !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { /* * Retpoline jumps are really dynamic jumps in * disguise, so convert them accordingly. diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index f4e041fbdab2..b408636c0201 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -68,6 +68,17 @@ static inline bool is_static_jump(struct instruction *insn) insn->type == INSN_JUMP_UNCONDITIONAL; } +static inline bool is_dynamic_jump(struct instruction *insn) +{ + return insn->type == INSN_JUMP_DYNAMIC || + insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL; +} + +static inline bool is_jump(struct instruction *insn) +{ + return is_static_jump(insn) || is_dynamic_jump(insn); +} + struct instruction *find_insn(struct objtool_file *file, struct section *sec, unsigned long offset); -- cgit v1.2.3 From 34ca59e109bdf69704c33b8eeffaa4c9f71076e5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:19 -0600 Subject: objtool: Fix ".cold" section suffix check for newer versions of GCC With my version of GCC 9.3.1 the ".cold" subfunctions no longer have a numbered suffix, so the trailing period is no longer there. Presumably this doesn't yet trigger a user-visible bug since most of the subfunction detection logic is duplicated. I only found it when testing vmlinux.o validation. Fixes: 54262aa28301 ("objtool: Fix sibling call detection") Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/ca0b5a57f08a2fbb48538dd915cc253b5edabb40.1611263461.git.jpoimboe@redhat.com --- tools/objtool/check.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 081572170f6b..c964cd56b557 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -846,8 +846,8 @@ static int add_jump_destinations(struct objtool_file *file) * case where the parent function's only reference to a * subfunction is through a jump table. */ - if (!strstr(insn->func->name, ".cold.") && - strstr(insn->jump_dest->func->name, ".cold.")) { + if (!strstr(insn->func->name, ".cold") && + strstr(insn->jump_dest->func->name, ".cold")) { insn->func->cfunc = insn->jump_dest->func; insn->jump_dest->func->pfunc = insn->func; -- cgit v1.2.3 From 31a7424bc58063a8e0466c3c10f31a52ec2be4f6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:20 -0600 Subject: objtool: Support retpoline jump detection for vmlinux.o Objtool converts direct retpoline jumps to type INSN_JUMP_DYNAMIC, since that's what they are semantically. That conversion doesn't work in vmlinux.o validation because the indirect thunk function is present in the object, so the intra-object jump check succeeds before the retpoline jump check gets a chance. Rearrange the checks: check for a retpoline jump before checking for an intra-object jump. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/4302893513770dde68ddc22a9d6a2a04aca491dd.1611263461.git.jpoimboe@redhat.com --- tools/objtool/check.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index c964cd56b557..5f5949951ca7 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -781,10 +781,6 @@ static int add_jump_destinations(struct objtool_file *file) } else if (reloc->sym->type == STT_SECTION) { dest_sec = reloc->sym->sec; dest_off = arch_dest_reloc_offset(reloc->addend); - } else if (reloc->sym->sec->idx) { - dest_sec = reloc->sym->sec; - dest_off = reloc->sym->sym.st_value + - arch_dest_reloc_offset(reloc->addend); } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) || !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) { /* @@ -798,6 +794,10 @@ static int add_jump_destinations(struct objtool_file *file) insn->retpoline_safe = true; continue; + } else if (reloc->sym->sec->idx) { + dest_sec = reloc->sym->sec; + dest_off = reloc->sym->sym.st_value + + arch_dest_reloc_offset(reloc->addend); } else { /* external sibling call */ insn->call_dest = reloc->sym; -- cgit v1.2.3 From ecf11ba4d066fe527586c6edd6ca68457ca55cf4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:22 -0600 Subject: objtool: Assume only ELF functions do sibling calls There's an inconsistency in how sibling calls are detected in non-function asm code, depending on the scope of the object. If the target code is external to the object, objtool considers it a sibling call. If the target code is internal but not a function, objtool *doesn't* consider it a sibling call. This can cause some inconsistencies between per-object and vmlinux.o validation. Instead, assume only ELF functions can do sibling calls. This generally matches existing reality, and makes sibling call validation consistent between vmlinux.o and per-object. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/0e9ab6f3628cc7bf3bde7aa6762d54d7df19ad78.1611263461.git.jpoimboe@redhat.com --- tools/objtool/check.c | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f5949951ca7..b4e1655017de 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -110,15 +110,20 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file, static bool is_sibling_call(struct instruction *insn) { + /* + * Assume only ELF functions can make sibling calls. This ensures + * sibling call detection consistency between vmlinux.o and individual + * objects. + */ + if (!insn->func) + return false; + /* An indirect jump is either a sibling call or a jump to a table. */ if (insn->type == INSN_JUMP_DYNAMIC) return list_empty(&insn->alts); - if (!is_static_jump(insn)) - return false; - /* add_jump_destinations() sets insn->call_dest for sibling calls. */ - return !!insn->call_dest; + return (is_static_jump(insn) && insn->call_dest); } /* @@ -774,7 +779,7 @@ static int add_jump_destinations(struct objtool_file *file) continue; reloc = find_reloc_by_dest_range(file->elf, insn->sec, - insn->offset, insn->len); + insn->offset, insn->len); if (!reloc) { dest_sec = insn->sec; dest_off = arch_jump_destination(insn); @@ -794,18 +799,21 @@ static int add_jump_destinations(struct objtool_file *file) insn->retpoline_safe = true; continue; - } else if (reloc->sym->sec->idx) { - dest_sec = reloc->sym->sec; - dest_off = reloc->sym->sym.st_value + - arch_dest_reloc_offset(reloc->addend); - } else { - /* external sibling call */ + } else if (insn->func) { + /* internal or external sibling call (with reloc) */ insn->call_dest = reloc->sym; if (insn->call_dest->static_call_tramp) { list_add_tail(&insn->static_call_node, &file->static_call_list); } continue; + } else if (reloc->sym->sec->idx) { + dest_sec = reloc->sym->sec; + dest_off = reloc->sym->sym.st_value + + arch_dest_reloc_offset(reloc->addend); + } else { + /* non-func asm code jumping to another file */ + continue; } insn->jump_dest = find_insn(file, dest_sec, dest_off); @@ -854,7 +862,7 @@ static int add_jump_destinations(struct objtool_file *file) } else if (insn->jump_dest->func->pfunc != insn->func->pfunc && insn->jump_dest->offset == insn->jump_dest->func->offset) { - /* internal sibling call */ + /* internal sibling call (without reloc) */ insn->call_dest = insn->jump_dest->func; if (insn->call_dest->static_call_tramp) { list_add_tail(&insn->static_call_node, @@ -2587,7 +2595,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_CONDITIONAL: case INSN_JUMP_UNCONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; @@ -2609,7 +2617,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, case INSN_JUMP_DYNAMIC: case INSN_JUMP_DYNAMIC_CONDITIONAL: - if (func && is_sibling_call(insn)) { + if (is_sibling_call(insn)) { ret = validate_sibling_call(insn, &state); if (ret) return ret; -- cgit v1.2.3 From 081df94301e317e84c3413686043987da2c3e39d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:23 -0600 Subject: objtool: Add asm version of STACK_FRAME_NON_STANDARD To be used for adding asm functions to the ignore list. The "aw" is needed to help the ELF section metadata match GCC-created sections. Otherwise the linker creates duplicate sections instead of combining them. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/8faa476f9a5ac89af27944ec184c89f95f3c6c49.1611263462.git.jpoimboe@redhat.com --- include/linux/objtool.h | 8 ++++++++ tools/include/linux/objtool.h | 8 ++++++++ 2 files changed, 16 insertions(+) (limited to 'tools') diff --git a/include/linux/objtool.h b/include/linux/objtool.h index 577f51436cf9..add1c6eb157e 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -109,6 +109,12 @@ struct unwind_hint { .popsection .endm +.macro STACK_FRAME_NON_STANDARD func:req + .pushsection .discard.func_stack_frame_non_standard, "aw" + .long \func - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -122,6 +128,8 @@ struct unwind_hint { #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm +.macro STACK_FRAME_NON_STANDARD func:req +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index 577f51436cf9..add1c6eb157e 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -109,6 +109,12 @@ struct unwind_hint { .popsection .endm +.macro STACK_FRAME_NON_STANDARD func:req + .pushsection .discard.func_stack_frame_non_standard, "aw" + .long \func - . + .popsection +.endm + #endif /* __ASSEMBLY__ */ #else /* !CONFIG_STACK_VALIDATION */ @@ -122,6 +128,8 @@ struct unwind_hint { #define ANNOTATE_INTRA_FUNCTION_CALL .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0 .endm +.macro STACK_FRAME_NON_STANDARD func:req +.endm #endif #endif /* CONFIG_STACK_VALIDATION */ -- cgit v1.2.3 From b735bd3e68824316655252a931a3353a6ebc036f Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:24 -0600 Subject: objtool: Combine UNWIND_HINT_RET_OFFSET and UNWIND_HINT_FUNC The ORC metadata generated for UNWIND_HINT_FUNC isn't actually very func-like. With certain usages it can cause stack state mismatches because it doesn't set the return address (CFI_RA). Also, users of UNWIND_HINT_RET_OFFSET no longer need to set a custom return stack offset. Instead they just need to specify a func-like situation, so the current ret_offset code is hacky for no good reason. Solve both problems by simplifying the RET_OFFSET handling and converting it into a more useful UNWIND_HINT_FUNC. If we end up needing the old 'ret_offset' functionality again in the future, we should be able to support it pretty easily with the addition of a custom 'sp_offset' in UNWIND_HINT_FUNC. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/db9d1f5d79dddfbb3725ef6d8ec3477ad199948d.1611263462.git.jpoimboe@redhat.com --- arch/x86/include/asm/unwind_hints.h | 13 ++---------- arch/x86/kernel/ftrace_64.S | 2 +- arch/x86/lib/retpoline.S | 2 +- include/linux/objtool.h | 5 ++++- tools/include/linux/objtool.h | 5 ++++- tools/objtool/arch/x86/decode.c | 4 ++-- tools/objtool/check.c | 37 ++++++++++++++--------------------- tools/objtool/include/objtool/check.h | 1 - 8 files changed, 29 insertions(+), 40 deletions(-) (limited to 'tools') diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index 664d4610d700..8e574c0afef8 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -48,17 +48,8 @@ UNWIND_HINT_REGS base=\base offset=\offset partial=1 .endm -.macro UNWIND_HINT_FUNC sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL -.endm - -/* - * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN - * and sibling calls. On these, sp_offset denotes the expected offset from - * initial_func_cfi. - */ -.macro UNWIND_HINT_RET_OFFSET sp_offset=8 - UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset +.macro UNWIND_HINT_FUNC + UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC .endm #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 58d125ed9d1a..1bf568d901b1 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -277,7 +277,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) restore_mcount_regs 8 /* Restore flags */ popfq - UNWIND_HINT_RET_OFFSET + UNWIND_HINT_FUNC jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index b4c43a9b1483..f6fb1d218dcc 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) jmp .Lspec_trap_\@ .Ldo_rop_\@: mov %\reg, (%_ASM_SP) - UNWIND_HINT_RET_OFFSET + UNWIND_HINT_FUNC ret SYM_FUNC_END(__x86_retpoline_\reg) diff --git a/include/linux/objtool.h b/include/linux/objtool.h index add1c6eb157e..7e72d975cb76 100644 --- a/include/linux/objtool.h +++ b/include/linux/objtool.h @@ -29,11 +29,14 @@ struct unwind_hint { * * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 +#define UNWIND_HINT_TYPE_FUNC 3 #ifdef CONFIG_STACK_VALIDATION diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index add1c6eb157e..7e72d975cb76 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -29,11 +29,14 @@ struct unwind_hint { * * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that * sp_reg+sp_offset points to the iret return frame. + * + * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function. + * Useful for code which doesn't have an ELF function annotation. */ #define UNWIND_HINT_TYPE_CALL 0 #define UNWIND_HINT_TYPE_REGS 1 #define UNWIND_HINT_TYPE_REGS_PARTIAL 2 -#define UNWIND_HINT_TYPE_RET_OFFSET 3 +#define UNWIND_HINT_TYPE_FUNC 3 #ifdef CONFIG_STACK_VALIDATION diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 6baa22732ca6..9637e3bf5ab8 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -563,8 +563,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state) state->cfa.offset = 8; /* initial RA (return address) */ - state->regs[16].base = CFI_CFA; - state->regs[16].offset = -8; + state->regs[CFI_RA].base = CFI_CFA; + state->regs[CFI_RA].offset = -8; } const char *arch_nop_insn(int len) diff --git a/tools/objtool/check.c b/tools/objtool/check.c index b4e1655017de..f88f20327bf2 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1404,13 +1404,20 @@ static int add_jump_table_alts(struct objtool_file *file) return 0; } +static void set_func_state(struct cfi_state *state) +{ + state->cfa = initial_func_cfi.cfa; + memcpy(&state->regs, &initial_func_cfi.regs, + CFI_NUM_REGS * sizeof(struct cfi_reg)); + state->stack_size = initial_func_cfi.cfa.offset; +} + static int read_unwind_hints(struct objtool_file *file) { struct section *sec, *relocsec; struct reloc *reloc; struct unwind_hint *hint; struct instruction *insn; - struct cfi_reg *cfa; int i; sec = find_section_by_name(file->elf, ".discard.unwind_hints"); @@ -1445,22 +1452,20 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - cfa = &insn->cfi.cfa; + insn->hint = true; - if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) { - insn->ret_offset = bswap_if_needed(hint->sp_offset); + if (hint->type == UNWIND_HINT_TYPE_FUNC) { + set_func_state(&insn->cfi); continue; } - insn->hint = true; - if (arch_decode_hint_reg(insn, hint->sp_reg)) { WARN_FUNC("unsupported unwind_hint sp base reg %d", insn->sec, insn->offset, hint->sp_reg); return -1; } - cfa->offset = bswap_if_needed(hint->sp_offset); + insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset); insn->cfi.type = hint->type; insn->cfi.end = hint->end; } @@ -1716,27 +1721,18 @@ static bool is_fentry_call(struct instruction *insn) static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state) { - u8 ret_offset = insn->ret_offset; struct cfi_state *cfi = &state->cfi; int i; if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap) return true; - if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->cfa.offset != initial_func_cfi.cfa.offset) return true; - if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset) + if (cfi->stack_size != initial_func_cfi.cfa.offset) return true; - /* - * If there is a ret offset hint then don't check registers - * because a callee-saved register might have been pushed on - * the stack. - */ - if (ret_offset) - return false; - for (i = 0; i < CFI_NUM_REGS; i++) { if (cfi->regs[i].base != initial_func_cfi.regs[i].base || cfi->regs[i].offset != initial_func_cfi.regs[i].offset) @@ -2880,10 +2876,7 @@ static int validate_section(struct objtool_file *file, struct section *sec) continue; init_insn_state(&state, sec); - state.cfi.cfa = initial_func_cfi.cfa; - memcpy(&state.cfi.regs, &initial_func_cfi.regs, - CFI_NUM_REGS * sizeof(struct cfi_reg)); - state.cfi.stack_size = initial_func_cfi.cfa.offset; + set_func_state(&state.cfi); warnings += validate_symbol(file, sec, func, &state); } diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h index b408636c0201..4891ead0e85f 100644 --- a/tools/objtool/include/objtool/check.h +++ b/tools/objtool/include/objtool/check.h @@ -50,7 +50,6 @@ struct instruction { bool retpoline_safe; s8 instr; u8 visited; - u8 ret_offset; struct alt_group *alt_group; struct symbol *call_dest; struct instruction *jump_dest; -- cgit v1.2.3 From c26acfbbfbc2ae4167e33825793e85e1a53058d8 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 21 Jan 2021 15:29:25 -0600 Subject: objtool: Add xen_start_kernel() to noreturn list xen_start_kernel() doesn't return. Annotate it as such so objtool can follow the code flow. Signed-off-by: Josh Poimboeuf Link: https://lore.kernel.org/r/930deafa89256c60b180442df59a1bbae48f30ab.1611263462.git.jpoimboe@redhat.com --- tools/objtool/check.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f88f20327bf2..5f056ddab4fa 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -160,6 +160,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, "machine_real_restart", "rewind_stack_do_exit", "kunit_try_catch_throw", + "xen_start_kernel", }; if (!func) -- cgit v1.2.3 From 9dc00b25eadf2908ae76ac0607b55a9f4e0e0cdc Mon Sep 17 00:00:00 2001 From: Barry Song Date: Sat, 6 Feb 2021 00:33:25 +1300 Subject: dma-mapping: benchmark: pretend DMA is transmitting In a real dma mapping user case, after dma_map is done, data will be transmit. Thus, in multi-threaded user scenario, IOMMU contention should not be that severe. For example, if users enable multiple threads to send network packets through 1G/10G/100Gbps NIC, usually the steps will be: map -> transmission -> unmap. Transmission delay reduces the contention of IOMMU. Here a delay is added to simulate the transmission between map and unmap so that the tested result could be more accurate for TX and simple RX. A typical TX transmission for NIC would be like: map -> TX -> unmap since the socket buffers come from OS. Simple RX model eg. disk driver, is also map -> RX -> unmap, but real RX model in a NIC could be more complicated considering packets can come spontaneously and many drivers are using pre-mapped buffers pool. This is in the TBD list. Signed-off-by: Barry Song Signed-off-by: Christoph Hellwig --- kernel/dma/map_benchmark.c | 12 +++++++++++- tools/testing/selftests/dma/dma_map_benchmark.c | 21 ++++++++++++++++++--- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c index da95df381483..e0e64f8b0739 100644 --- a/kernel/dma/map_benchmark.c +++ b/kernel/dma/map_benchmark.c @@ -21,6 +21,7 @@ #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +37,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; struct map_benchmark_data { @@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data) map_etime = ktime_get(); map_delta = ktime_sub(map_etime, map_stime); + /* Pretend DMA is transmitting */ + ndelay(map->bparam.dma_trans_ns); + unmap_stime = ktime_get(); dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir); unmap_etime = ktime_get(); @@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } + if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) { + pr_err("invalid transmission delay\n"); + return -EINVAL; + } + if (map->bparam.node != NUMA_NO_NODE && !node_possible(map->bparam.node)) { pr_err("invalid numa node\n"); diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c index 537d65968c48..fb23ce9617ea 100644 --- a/tools/testing/selftests/dma/dma_map_benchmark.c +++ b/tools/testing/selftests/dma/dma_map_benchmark.c @@ -12,9 +12,12 @@ #include #include +#define NSEC_PER_MSEC 1000000L + #define DMA_MAP_BENCHMARK _IOWR('d', 1, struct map_benchmark) #define DMA_MAP_MAX_THREADS 1024 #define DMA_MAP_MAX_SECONDS 300 +#define DMA_MAP_MAX_TRANS_DELAY (10 * NSEC_PER_MSEC) #define DMA_MAP_BIDIRECTIONAL 0 #define DMA_MAP_TO_DEVICE 1 @@ -36,7 +39,8 @@ struct map_benchmark { __s32 node; /* which numa node this benchmark will run on */ __u32 dma_bits; /* DMA addressing capability */ __u32 dma_dir; /* DMA data direction */ - __u8 expansion[84]; /* For future use */ + __u32 dma_trans_ns; /* time for DMA transmission in ns */ + __u8 expansion[80]; /* For future use */ }; int main(int argc, char **argv) @@ -46,12 +50,12 @@ int main(int argc, char **argv) /* default single thread, run 20 seconds on NUMA_NO_NODE */ int threads = 1, seconds = 20, node = -1; /* default dma mask 32bit, bidirectional DMA */ - int bits = 32, dir = DMA_MAP_BIDIRECTIONAL; + int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL; int cmd = DMA_MAP_BENCHMARK; char *p; - while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) { + while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) { switch (opt) { case 't': threads = atoi(optarg); @@ -68,6 +72,9 @@ int main(int argc, char **argv) case 'd': dir = atoi(optarg); break; + case 'x': + xdelay = atoi(optarg); + break; default: return -1; } @@ -85,6 +92,12 @@ int main(int argc, char **argv) exit(1); } + if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) { + fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n", + DMA_MAP_MAX_TRANS_DELAY); + exit(1); + } + /* suppose the mininum DMA zone is 1MB in the world */ if (bits < 20 || bits > 64) { fprintf(stderr, "invalid dma mask bit, must be in 20-64\n"); @@ -109,6 +122,8 @@ int main(int argc, char **argv) map.node = node; map.dma_bits = bits; map.dma_dir = dir; + map.dma_trans_ns = xdelay; + if (ioctl(fd, cmd, &map)) { perror("ioctl"); exit(1); -- cgit v1.2.3 From f1c3d73e973cfad85ff5d3d86086503e742d8c62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Feb 2021 13:13:33 +0100 Subject: module: remove EXPORT_SYMBOL_GPL_FUTURE As far as I can tell this has never been used at all, and certainly not any time recently. Reviewed-by: Miroslav Benes Reviewed-by: Emil Velikov Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- arch/x86/tools/relocs.c | 4 ++-- include/asm-generic/vmlinux.lds.h | 14 -------------- include/linux/export.h | 1 - include/linux/module.h | 5 ----- kernel/module.c | 29 ++--------------------------- scripts/mod/modpost.c | 13 +------------ scripts/mod/modpost.h | 1 - scripts/module.lds.S | 2 -- tools/include/linux/export.h | 1 - 9 files changed, 5 insertions(+), 65 deletions(-) (limited to 'tools') diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index ce7188cbdae5..0d210d0e83e2 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -61,8 +61,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl|_unused|_unused_gpl|_gpl_future)|" - "__(start|stop)___kcrctab(|_gpl|_unused|_unused_gpl|_gpl_future)|" + "__(start|stop)___ksymtab(|_gpl|_unused|_unused_gpl)|" + "__(start|stop)___kcrctab(|_gpl|_unused|_unused_gpl)|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index b2b3d81b1535..83243506e68b 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -495,13 +495,6 @@ __stop___ksymtab_unused_gpl = .; \ } \ \ - /* Kernel symbol table: GPL-future-only symbols */ \ - __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \ - __start___ksymtab_gpl_future = .; \ - KEEP(*(SORT(___ksymtab_gpl_future+*))) \ - __stop___ksymtab_gpl_future = .; \ - } \ - \ /* Kernel symbol table: Normal symbols */ \ __kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \ __start___kcrctab = .; \ @@ -530,13 +523,6 @@ __stop___kcrctab_unused_gpl = .; \ } \ \ - /* Kernel symbol table: GPL-future-only symbols */ \ - __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \ - __start___kcrctab_gpl_future = .; \ - KEEP(*(SORT(___kcrctab_gpl_future+*))) \ - __stop___kcrctab_gpl_future = .; \ - } \ - \ /* Kernel symbol table: strings */ \ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ *(__ksymtab_strings) \ diff --git a/include/linux/export.h b/include/linux/export.h index fceb5e855717..362b64f8d4a7 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -157,7 +157,6 @@ struct kernel_symbol { #define EXPORT_SYMBOL(sym) _EXPORT_SYMBOL(sym, "") #define EXPORT_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_gpl") -#define EXPORT_SYMBOL_GPL_FUTURE(sym) _EXPORT_SYMBOL(sym, "_gpl_future") #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) diff --git a/include/linux/module.h b/include/linux/module.h index da0f5966ee80..e6e50ee30412 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -411,11 +411,6 @@ struct module { bool async_probe_requested; - /* symbols that will be GPL-only in the near future. */ - const struct kernel_symbol *gpl_future_syms; - const s32 *gpl_future_crcs; - unsigned int num_gpl_future_syms; - /* Exception table */ unsigned int num_exentries; struct exception_table_entry *extable; diff --git a/kernel/module.c b/kernel/module.c index 9cd6814405b9..95186c9d81ea 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -408,11 +408,8 @@ extern const struct kernel_symbol __start___ksymtab[]; extern const struct kernel_symbol __stop___ksymtab[]; extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; -extern const struct kernel_symbol __start___ksymtab_gpl_future[]; -extern const struct kernel_symbol __stop___ksymtab_gpl_future[]; extern const s32 __start___kcrctab[]; extern const s32 __start___kcrctab_gpl[]; -extern const s32 __start___kcrctab_gpl_future[]; #ifdef CONFIG_UNUSED_SYMBOLS extern const struct kernel_symbol __start___ksymtab_unused[]; extern const struct kernel_symbol __stop___ksymtab_unused[]; @@ -434,7 +431,6 @@ struct symsearch { enum mod_license { NOT_GPL_ONLY, GPL_ONLY, - WILL_BE_GPL_ONLY, } license; bool unused; }; @@ -458,15 +454,8 @@ static bool check_exported_symbol(const struct symsearch *syms, { struct find_symbol_arg *fsa = data; - if (!fsa->gplok) { - if (syms->license == GPL_ONLY) - return false; - if (syms->license == WILL_BE_GPL_ONLY && fsa->warn) { - pr_warn("Symbol %s is being used by a non-GPL module, " - "which will not be allowed in the future\n", - fsa->name); - } - } + if (!fsa->gplok && syms->license == GPL_ONLY) + return false; #ifdef CONFIG_UNUSED_SYMBOLS if (syms->unused && fsa->warn) { @@ -550,9 +539,6 @@ static bool find_symbol(struct find_symbol_arg *fsa) { __start___ksymtab_gpl, __stop___ksymtab_gpl, __start___kcrctab_gpl, GPL_ONLY, false }, - { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future, - __start___kcrctab_gpl_future, - WILL_BE_GPL_ONLY, false }, #ifdef CONFIG_UNUSED_SYMBOLS { __start___ksymtab_unused, __stop___ksymtab_unused, __start___kcrctab_unused, @@ -579,10 +565,6 @@ static bool find_symbol(struct find_symbol_arg *fsa) { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, mod->gpl_crcs, GPL_ONLY, false }, - { mod->gpl_future_syms, - mod->gpl_future_syms + mod->num_gpl_future_syms, - mod->gpl_future_crcs, - WILL_BE_GPL_ONLY, false }, #ifdef CONFIG_UNUSED_SYMBOLS { mod->unused_syms, mod->unused_syms + mod->num_unused_syms, @@ -2292,7 +2274,6 @@ static int verify_exported_symbols(struct module *mod) } arr[] = { { mod->syms, mod->num_syms }, { mod->gpl_syms, mod->num_gpl_syms }, - { mod->gpl_future_syms, mod->num_gpl_future_syms }, #ifdef CONFIG_UNUSED_SYMBOLS { mod->unused_syms, mod->num_unused_syms }, { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, @@ -3308,11 +3289,6 @@ static int find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->gpl_syms), &mod->num_gpl_syms); mod->gpl_crcs = section_addr(info, "__kcrctab_gpl"); - mod->gpl_future_syms = section_objs(info, - "__ksymtab_gpl_future", - sizeof(*mod->gpl_future_syms), - &mod->num_gpl_future_syms); - mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future"); #ifdef CONFIG_UNUSED_SYMBOLS mod->unused_syms = section_objs(info, "__ksymtab_unused", @@ -3506,7 +3482,6 @@ static int check_module_license_and_versions(struct module *mod) #ifdef CONFIG_MODVERSIONS if ((mod->num_syms && !mod->crcs) || (mod->num_gpl_syms && !mod->gpl_crcs) - || (mod->num_gpl_future_syms && !mod->gpl_future_crcs) #ifdef CONFIG_UNUSED_SYMBOLS || (mod->num_unused_syms && !mod->unused_crcs) || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index d6c81657d695..25c1446055d1 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -44,7 +44,7 @@ static bool error_occurred; enum export { export_plain, export_unused, export_gpl, - export_unused_gpl, export_gpl_future, export_unknown + export_unused_gpl, export_unknown }; /* In kernel, this size is defined in linux/module.h; @@ -304,7 +304,6 @@ static const struct { { .str = "EXPORT_UNUSED_SYMBOL", .export = export_unused }, { .str = "EXPORT_SYMBOL_GPL", .export = export_gpl }, { .str = "EXPORT_UNUSED_SYMBOL_GPL", .export = export_unused_gpl }, - { .str = "EXPORT_SYMBOL_GPL_FUTURE", .export = export_gpl_future }, { .str = "(unknown)", .export = export_unknown }, }; @@ -369,8 +368,6 @@ static enum export export_from_secname(struct elf_info *elf, unsigned int sec) return export_gpl; else if (strstarts(secname, "___ksymtab_unused_gpl+")) return export_unused_gpl; - else if (strstarts(secname, "___ksymtab_gpl_future+")) - return export_gpl_future; else return export_unknown; } @@ -385,8 +382,6 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec) return export_gpl; else if (sec == elf->export_unused_gpl_sec) return export_unused_gpl; - else if (sec == elf->export_gpl_future_sec) - return export_gpl_future; else return export_unknown; } @@ -596,8 +591,6 @@ static int parse_elf(struct elf_info *info, const char *filename) info->export_gpl_sec = i; else if (strcmp(secname, "__ksymtab_unused_gpl") == 0) info->export_unused_gpl_sec = i; - else if (strcmp(secname, "__ksymtab_gpl_future") == 0) - info->export_gpl_future_sec = i; if (sechdrs[i].sh_type == SHT_SYMTAB) { unsigned int sh_link_idx; @@ -2152,10 +2145,6 @@ static void check_for_gpl_usage(enum export exp, const char *m, const char *s) error("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n", m, s); break; - case export_gpl_future: - warn("GPL-incompatible module %s.ko uses future GPL-only symbol '%s'\n", - m, s); - break; case export_plain: case export_unused: case export_unknown: diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index e6f46eee0af0..834220de002b 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -142,7 +142,6 @@ struct elf_info { Elf_Section export_unused_sec; Elf_Section export_gpl_sec; Elf_Section export_unused_gpl_sec; - Elf_Section export_gpl_future_sec; char *strtab; char *modinfo; unsigned int modinfo_len; diff --git a/scripts/module.lds.S b/scripts/module.lds.S index 69b9b71a6a47..d82b452e8a71 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -13,12 +13,10 @@ SECTIONS { __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } __ksymtab_unused 0 : { *(SORT(___ksymtab_unused+*)) } __ksymtab_unused_gpl 0 : { *(SORT(___ksymtab_unused_gpl+*)) } - __ksymtab_gpl_future 0 : { *(SORT(___ksymtab_gpl_future+*)) } __kcrctab 0 : { *(SORT(___kcrctab+*)) } __kcrctab_gpl 0 : { *(SORT(___kcrctab_gpl+*)) } __kcrctab_unused 0 : { *(SORT(___kcrctab_unused+*)) } __kcrctab_unused_gpl 0 : { *(SORT(___kcrctab_unused_gpl+*)) } - __kcrctab_gpl_future 0 : { *(SORT(___kcrctab_gpl_future+*)) } .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) } diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h index d07e586b9ba0..9f61349a8944 100644 --- a/tools/include/linux/export.h +++ b/tools/include/linux/export.h @@ -3,7 +3,6 @@ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) #define EXPORT_UNUSED_SYMBOL(sym) #define EXPORT_UNUSED_SYMBOL_GPL(sym) -- cgit v1.2.3 From 367948220fcefcad1bf0d3d595a06efe0694acae Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 2 Feb 2021 13:13:34 +0100 Subject: module: remove EXPORT_UNUSED_SYMBOL* EXPORT_UNUSED_SYMBOL* is not actually used anywhere. Remove the unused functionality as we generally just remove unused code anyway. Reviewed-by: Miroslav Benes Reviewed-by: Emil Velikov Signed-off-by: Christoph Hellwig Signed-off-by: Jessica Yu --- arch/arm/configs/bcm2835_defconfig | 1 - arch/arm/configs/mxs_defconfig | 1 - arch/mips/configs/nlm_xlp_defconfig | 1 - arch/mips/configs/nlm_xlr_defconfig | 1 - arch/parisc/configs/generic-32bit_defconfig | 1 - arch/parisc/configs/generic-64bit_defconfig | 1 - arch/powerpc/configs/ppc6xx_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/sh/configs/edosk7760_defconfig | 1 - arch/sh/configs/sdk7780_defconfig | 1 - arch/x86/configs/i386_defconfig | 1 - arch/x86/configs/x86_64_defconfig | 1 - arch/x86/tools/relocs.c | 4 +- include/asm-generic/vmlinux.lds.h | 28 ------------ include/linux/export.h | 8 ---- include/linux/module.h | 12 ----- init/Kconfig | 17 ------- kernel/module.c | 71 +++-------------------------- scripts/checkpatch.pl | 6 +-- scripts/mod/modpost.c | 39 ++-------------- scripts/mod/modpost.h | 2 - scripts/module.lds.S | 4 -- tools/include/linux/export.h | 2 - 24 files changed, 13 insertions(+), 193 deletions(-) (limited to 'tools') diff --git a/arch/arm/configs/bcm2835_defconfig b/arch/arm/configs/bcm2835_defconfig index 44ff9cd88d81..d6c6c2e031c4 100644 --- a/arch/arm/configs/bcm2835_defconfig +++ b/arch/arm/configs/bcm2835_defconfig @@ -177,7 +177,6 @@ CONFIG_BOOT_PRINTK_DELAY=y CONFIG_DYNAMIC_DEBUG=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_LOCKUP_DETECTOR=y CONFIG_SCHED_TRACER=y diff --git a/arch/arm/configs/mxs_defconfig b/arch/arm/configs/mxs_defconfig index a9c6f32a9b1c..ca32446b187f 100644 --- a/arch/arm/configs/mxs_defconfig +++ b/arch/arm/configs/mxs_defconfig @@ -164,7 +164,6 @@ CONFIG_FONTS=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_FRAME_WARN=2048 -CONFIG_UNUSED_SYMBOLS=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y CONFIG_SOFTLOCKUP_DETECTOR=y diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig index 72a211d2d556..32c290611723 100644 --- a/arch/mips/configs/nlm_xlp_defconfig +++ b/arch/mips/configs/nlm_xlp_defconfig @@ -549,7 +549,6 @@ CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_FRAME_WARN=1024 -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig index 4ecb157e56d4..bf9b9244929e 100644 --- a/arch/mips/configs/nlm_xlr_defconfig +++ b/arch/mips/configs/nlm_xlr_defconfig @@ -500,7 +500,6 @@ CONFIG_CRC7=m CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y # CONFIG_ENABLE_MUST_CHECK is not set -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DETECT_HUNG_TASK=y CONFIG_SCHEDSTATS=y diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 3cbcfad5f724..7611d48c599e 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -22,7 +22,6 @@ CONFIG_PCI_LBA=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_UNUSED_SYMBOLS=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_BINFMT_MISC=m diff --git a/arch/parisc/configs/generic-64bit_defconfig b/arch/parisc/configs/generic-64bit_defconfig index 8f81fcbf04c4..53054b81461a 100644 --- a/arch/parisc/configs/generic-64bit_defconfig +++ b/arch/parisc/configs/generic-64bit_defconfig @@ -31,7 +31,6 @@ CONFIG_MODULE_FORCE_LOAD=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BINFMT_MISC=m # CONFIG_COMPACTION is not set diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index ef09f3cce1fa..34c3859040f9 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -1072,7 +1072,6 @@ CONFIG_NLS_ISO8859_15=m CONFIG_NLS_KOI8_R=m CONFIG_NLS_KOI8_U=m CONFIG_DEBUG_INFO=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_HEADERS_INSTALL=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index c4f6ff98a612..58e54d17e315 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -71,7 +71,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_INTEGRITY=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 51135893cffe..b5e62c0d3e23 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -66,7 +66,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y CONFIG_MODVERSIONS=y CONFIG_MODULE_SRCVERSION_ALL=y CONFIG_MODULE_SIG_SHA256=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_BLK_WBT=y CONFIG_BLK_CGROUP_IOLATENCY=y diff --git a/arch/sh/configs/edosk7760_defconfig b/arch/sh/configs/edosk7760_defconfig index 02ba62298576..d77f54e906fd 100644 --- a/arch/sh/configs/edosk7760_defconfig +++ b/arch/sh/configs/edosk7760_defconfig @@ -102,7 +102,6 @@ CONFIG_NLS_UTF8=y CONFIG_PRINTK_TIME=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_SHIRQ=y CONFIG_DETECT_HUNG_TASK=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index d10a0414123a..d53c4595fb2e 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -130,7 +130,6 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_UTF8=y # CONFIG_ENABLE_MUST_CHECK is not set CONFIG_MAGIC_SYSRQ=y -CONFIG_UNUSED_SYMBOLS=y CONFIG_DEBUG_KERNEL=y CONFIG_DETECT_HUNG_TASK=y # CONFIG_SCHED_DEBUG is not set diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 78210793d357..9c9c4a888b1d 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -50,7 +50,6 @@ CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_UNUSED_SYMBOLS is not set CONFIG_BINFMT_MISC=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9936528e1939..b60bd2d86034 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -48,7 +48,6 @@ CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_FORCE_UNLOAD=y -# CONFIG_UNUSED_SYMBOLS is not set CONFIG_BINFMT_MISC=y CONFIG_NET=y CONFIG_PACKET=y diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index 0d210d0e83e2..b9c577a3cacc 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c @@ -61,8 +61,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|" "__(start|end)_pci_.*|" "__(start|end)_builtin_fw|" - "__(start|stop)___ksymtab(|_gpl|_unused|_unused_gpl)|" - "__(start|stop)___kcrctab(|_gpl|_unused|_unused_gpl)|" + "__(start|stop)___ksymtab(|_gpl)|" + "__(start|stop)___kcrctab(|_gpl)|" "__(start|stop)___param|" "__(start|stop)___modver|" "__(start|stop)___bug_table|" diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 83243506e68b..1fa338ac6a54 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -481,20 +481,6 @@ __stop___ksymtab_gpl = .; \ } \ \ - /* Kernel symbol table: Normal unused symbols */ \ - __ksymtab_unused : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) { \ - __start___ksymtab_unused = .; \ - KEEP(*(SORT(___ksymtab_unused+*))) \ - __stop___ksymtab_unused = .; \ - } \ - \ - /* Kernel symbol table: GPL-only unused symbols */ \ - __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \ - __start___ksymtab_unused_gpl = .; \ - KEEP(*(SORT(___ksymtab_unused_gpl+*))) \ - __stop___ksymtab_unused_gpl = .; \ - } \ - \ /* Kernel symbol table: Normal symbols */ \ __kcrctab : AT(ADDR(__kcrctab) - LOAD_OFFSET) { \ __start___kcrctab = .; \ @@ -509,20 +495,6 @@ __stop___kcrctab_gpl = .; \ } \ \ - /* Kernel symbol table: Normal unused symbols */ \ - __kcrctab_unused : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) { \ - __start___kcrctab_unused = .; \ - KEEP(*(SORT(___kcrctab_unused+*))) \ - __stop___kcrctab_unused = .; \ - } \ - \ - /* Kernel symbol table: GPL-only unused symbols */ \ - __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \ - __start___kcrctab_unused_gpl = .; \ - KEEP(*(SORT(___kcrctab_unused_gpl+*))) \ - __stop___kcrctab_unused_gpl = .; \ - } \ - \ /* Kernel symbol table: strings */ \ __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) { \ *(__ksymtab_strings) \ diff --git a/include/linux/export.h b/include/linux/export.h index 362b64f8d4a7..6271a5d9c988 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -160,14 +160,6 @@ struct kernel_symbol { #define EXPORT_SYMBOL_NS(sym, ns) __EXPORT_SYMBOL(sym, "", #ns) #define EXPORT_SYMBOL_NS_GPL(sym, ns) __EXPORT_SYMBOL(sym, "_gpl", #ns) -#ifdef CONFIG_UNUSED_SYMBOLS -#define EXPORT_UNUSED_SYMBOL(sym) _EXPORT_SYMBOL(sym, "_unused") -#define EXPORT_UNUSED_SYMBOL_GPL(sym) _EXPORT_SYMBOL(sym, "_unused_gpl") -#else -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) -#endif - #endif /* !__ASSEMBLY__ */ #endif /* _LINUX_EXPORT_H */ diff --git a/include/linux/module.h b/include/linux/module.h index e6e50ee30412..59f094fa6f74 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -392,18 +392,6 @@ struct module { const s32 *gpl_crcs; bool using_gplonly_symbols; -#ifdef CONFIG_UNUSED_SYMBOLS - /* unused exported symbols. */ - const struct kernel_symbol *unused_syms; - const s32 *unused_crcs; - unsigned int num_unused_syms; - - /* GPL-only, unused exported symbols. */ - unsigned int num_unused_gpl_syms; - const struct kernel_symbol *unused_gpl_syms; - const s32 *unused_gpl_crcs; -#endif - #ifdef CONFIG_MODULE_SIG /* Signature was verified. */ bool sig_ok; diff --git a/init/Kconfig b/init/Kconfig index b77c60f8b963..11b803b45c19 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -2262,25 +2262,8 @@ config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS If unsure, say N. -config UNUSED_SYMBOLS - bool "Enable unused/obsolete exported symbols" - default y if X86 - help - Unused but exported symbols make the kernel needlessly bigger. For - that reason most of these unused exports will soon be removed. This - option is provided temporarily to provide a transition period in case - some external kernel module needs one of these symbols anyway. If you - encounter such a case in your module, consider if you are actually - using the right API. (rationale: since nobody in the kernel is using - this in a module, there is a pretty good chance it's actually the - wrong interface to use). If you really need the symbol, please send a - mail to the linux kernel mailing list mentioning the symbol and why - you really need it, and what the merge plan to the mainline kernel for - your module is. - config TRIM_UNUSED_KSYMS bool "Trim unused exported kernel symbols" - depends on !UNUSED_SYMBOLS help The kernel and some modules make many symbols available for other modules to use via EXPORT_SYMBOL() and variants. Depending diff --git a/kernel/module.c b/kernel/module.c index 95186c9d81ea..93f360250bcb 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -410,14 +410,6 @@ extern const struct kernel_symbol __start___ksymtab_gpl[]; extern const struct kernel_symbol __stop___ksymtab_gpl[]; extern const s32 __start___kcrctab[]; extern const s32 __start___kcrctab_gpl[]; -#ifdef CONFIG_UNUSED_SYMBOLS -extern const struct kernel_symbol __start___ksymtab_unused[]; -extern const struct kernel_symbol __stop___ksymtab_unused[]; -extern const struct kernel_symbol __start___ksymtab_unused_gpl[]; -extern const struct kernel_symbol __stop___ksymtab_unused_gpl[]; -extern const s32 __start___kcrctab_unused[]; -extern const s32 __start___kcrctab_unused_gpl[]; -#endif #ifndef CONFIG_MODVERSIONS #define symversion(base, idx) NULL @@ -432,7 +424,6 @@ struct symsearch { NOT_GPL_ONLY, GPL_ONLY, } license; - bool unused; }; struct find_symbol_arg { @@ -456,19 +447,6 @@ static bool check_exported_symbol(const struct symsearch *syms, if (!fsa->gplok && syms->license == GPL_ONLY) return false; - -#ifdef CONFIG_UNUSED_SYMBOLS - if (syms->unused && fsa->warn) { - pr_warn("Symbol %s is marked as UNUSED, however this module is " - "using it.\n", fsa->name); - pr_warn("This symbol will go away in the future.\n"); - pr_warn("Please evaluate if this is the right api to use and " - "if it really is, submit a report to the linux kernel " - "mailing list together with submitting your code for " - "inclusion.\n"); - } -#endif - fsa->owner = owner; fsa->crc = symversion(syms->crcs, symnum); fsa->sym = &syms->start[symnum]; @@ -535,18 +513,10 @@ static bool find_symbol(struct find_symbol_arg *fsa) { static const struct symsearch arr[] = { { __start___ksymtab, __stop___ksymtab, __start___kcrctab, - NOT_GPL_ONLY, false }, + NOT_GPL_ONLY }, { __start___ksymtab_gpl, __stop___ksymtab_gpl, __start___kcrctab_gpl, - GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { __start___ksymtab_unused, __stop___ksymtab_unused, - __start___kcrctab_unused, - NOT_GPL_ONLY, true }, - { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl, - __start___kcrctab_unused_gpl, - GPL_ONLY, true }, -#endif + GPL_ONLY }, }; struct module *mod; unsigned int i; @@ -561,20 +531,10 @@ static bool find_symbol(struct find_symbol_arg *fsa) lockdep_is_held(&module_mutex)) { struct symsearch arr[] = { { mod->syms, mod->syms + mod->num_syms, mod->crcs, - NOT_GPL_ONLY, false }, + NOT_GPL_ONLY }, { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms, mod->gpl_crcs, - GPL_ONLY, false }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, - mod->unused_syms + mod->num_unused_syms, - mod->unused_crcs, - NOT_GPL_ONLY, true }, - { mod->unused_gpl_syms, - mod->unused_gpl_syms + mod->num_unused_gpl_syms, - mod->unused_gpl_crcs, - GPL_ONLY, true }, -#endif + GPL_ONLY }, }; if (mod->state == MODULE_STATE_UNFORMED) @@ -2274,10 +2234,6 @@ static int verify_exported_symbols(struct module *mod) } arr[] = { { mod->syms, mod->num_syms }, { mod->gpl_syms, mod->num_gpl_syms }, -#ifdef CONFIG_UNUSED_SYMBOLS - { mod->unused_syms, mod->num_unused_syms }, - { mod->unused_gpl_syms, mod->num_unused_gpl_syms }, -#endif }; for (i = 0; i < ARRAY_SIZE(arr); i++) { @@ -3290,16 +3246,6 @@ static int find_module_sections(struct module *mod, struct load_info *info) &mod->num_gpl_syms); mod->gpl_crcs = section_addr(info, "__kcrctab_gpl"); -#ifdef CONFIG_UNUSED_SYMBOLS - mod->unused_syms = section_objs(info, "__ksymtab_unused", - sizeof(*mod->unused_syms), - &mod->num_unused_syms); - mod->unused_crcs = section_addr(info, "__kcrctab_unused"); - mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl", - sizeof(*mod->unused_gpl_syms), - &mod->num_unused_gpl_syms); - mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl"); -#endif #ifdef CONFIG_CONSTRUCTORS mod->ctors = section_objs(info, ".ctors", sizeof(*mod->ctors), &mod->num_ctors); @@ -3480,13 +3426,8 @@ static int check_module_license_and_versions(struct module *mod) pr_warn("%s: module license taints kernel.\n", mod->name); #ifdef CONFIG_MODVERSIONS - if ((mod->num_syms && !mod->crcs) - || (mod->num_gpl_syms && !mod->gpl_crcs) -#ifdef CONFIG_UNUSED_SYMBOLS - || (mod->num_unused_syms && !mod->unused_crcs) - || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs) -#endif - ) { + if ((mod->num_syms && !mod->crcs) || + (mod->num_gpl_syms && !mod->gpl_crcs)) { return try_to_force_load(mod, "no versions for exported symbols"); } diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 92e888ed939f..eabd2d5467b1 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -4290,8 +4290,7 @@ sub process { if (defined $realline_next && exists $lines[$realline_next - 1] && !defined $suppress_export{$realline_next} && - ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ || - $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) { + ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/)) { # Handle definitions which produce identifiers with # a prefix: # XXX(foo); @@ -4318,8 +4317,7 @@ sub process { } if (!defined $suppress_export{$linenr} && $prevline =~ /^.\s*$/ && - ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ || - $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) { + ($line =~ /EXPORT_SYMBOL.*\((.*)\)/)) { #print "FOO B <$lines[$linenr - 1]>\n"; $suppress_export{$linenr} = 2; } diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 25c1446055d1..20fc57837881 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -43,8 +43,9 @@ static int allow_missing_ns_imports; static bool error_occurred; enum export { - export_plain, export_unused, export_gpl, - export_unused_gpl, export_unknown + export_plain, + export_gpl, + export_unknown }; /* In kernel, this size is defined in linux/module.h; @@ -301,9 +302,7 @@ static const struct { enum export export; } export_list[] = { { .str = "EXPORT_SYMBOL", .export = export_plain }, - { .str = "EXPORT_UNUSED_SYMBOL", .export = export_unused }, { .str = "EXPORT_SYMBOL_GPL", .export = export_gpl }, - { .str = "EXPORT_UNUSED_SYMBOL_GPL", .export = export_unused_gpl }, { .str = "(unknown)", .export = export_unknown }, }; @@ -362,12 +361,8 @@ static enum export export_from_secname(struct elf_info *elf, unsigned int sec) if (strstarts(secname, "___ksymtab+")) return export_plain; - else if (strstarts(secname, "___ksymtab_unused+")) - return export_unused; else if (strstarts(secname, "___ksymtab_gpl+")) return export_gpl; - else if (strstarts(secname, "___ksymtab_unused_gpl+")) - return export_unused_gpl; else return export_unknown; } @@ -376,12 +371,8 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec) { if (sec == elf->export_sec) return export_plain; - else if (sec == elf->export_unused_sec) - return export_unused; else if (sec == elf->export_gpl_sec) return export_gpl; - else if (sec == elf->export_unused_gpl_sec) - return export_unused_gpl; else return export_unknown; } @@ -585,12 +576,8 @@ static int parse_elf(struct elf_info *info, const char *filename) info->modinfo_len = sechdrs[i].sh_size; } else if (strcmp(secname, "__ksymtab") == 0) info->export_sec = i; - else if (strcmp(secname, "__ksymtab_unused") == 0) - info->export_unused_sec = i; else if (strcmp(secname, "__ksymtab_gpl") == 0) info->export_gpl_sec = i; - else if (strcmp(secname, "__ksymtab_unused_gpl") == 0) - info->export_unused_gpl_sec = i; if (sechdrs[i].sh_type == SHT_SYMTAB) { unsigned int sh_link_idx; @@ -2141,32 +2128,13 @@ static void check_for_gpl_usage(enum export exp, const char *m, const char *s) error("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n", m, s); break; - case export_unused_gpl: - error("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n", - m, s); - break; case export_plain: - case export_unused: case export_unknown: /* ignore */ break; } } -static void check_for_unused(enum export exp, const char *m, const char *s) -{ - switch (exp) { - case export_unused: - case export_unused_gpl: - warn("module %s.ko uses symbol '%s' marked UNUSED\n", - m, s); - break; - default: - /* ignore */ - break; - } -} - static void check_exports(struct module *mod) { struct symbol *s, *exp; @@ -2197,7 +2165,6 @@ static void check_exports(struct module *mod) if (!mod->gpl_compatible) check_for_gpl_usage(exp->export, basename, exp->name); - check_for_unused(exp->export, basename, exp->name); } } diff --git a/scripts/mod/modpost.h b/scripts/mod/modpost.h index 834220de002b..0c47ff95c0e2 100644 --- a/scripts/mod/modpost.h +++ b/scripts/mod/modpost.h @@ -139,9 +139,7 @@ struct elf_info { Elf_Sym *symtab_start; Elf_Sym *symtab_stop; Elf_Section export_sec; - Elf_Section export_unused_sec; Elf_Section export_gpl_sec; - Elf_Section export_unused_gpl_sec; char *strtab; char *modinfo; unsigned int modinfo_len; diff --git a/scripts/module.lds.S b/scripts/module.lds.S index d82b452e8a71..24e8af579ce3 100644 --- a/scripts/module.lds.S +++ b/scripts/module.lds.S @@ -11,12 +11,8 @@ SECTIONS { __ksymtab 0 : { *(SORT(___ksymtab+*)) } __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } - __ksymtab_unused 0 : { *(SORT(___ksymtab_unused+*)) } - __ksymtab_unused_gpl 0 : { *(SORT(___ksymtab_unused_gpl+*)) } __kcrctab 0 : { *(SORT(___kcrctab+*)) } __kcrctab_gpl 0 : { *(SORT(___kcrctab_gpl+*)) } - __kcrctab_unused 0 : { *(SORT(___kcrctab_unused+*)) } - __kcrctab_unused_gpl 0 : { *(SORT(___kcrctab_unused_gpl+*)) } .init_array 0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) } diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h index 9f61349a8944..acb6f4daa2f0 100644 --- a/tools/include/linux/export.h +++ b/tools/include/linux/export.h @@ -3,7 +3,5 @@ #define EXPORT_SYMBOL(sym) #define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) #endif -- cgit v1.2.3 From 87ccc826bf1c9e5ab4c2f649b404e02c63e47622 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Feb 2021 12:02:21 +0100 Subject: x86/unwind/orc: Change REG_SP_INDIRECT Currently REG_SP_INDIRECT is unused but means (%rsp + offset), change it to mean (%rsp) + offset. The reason is that we're going to swizzle stack in the middle of a C function with non-trivial stack footprint. This means that when the unwinder finds the ToS, it needs to dereference it (%rsp) and then add the offset to the next frame, resulting in: (%rsp) + offset This is somewhat unfortunate, since REG_BP_INDIRECT is used (by DRAP) and thus needs to retain the current (%rbp + offset). Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf --- arch/x86/kernel/unwind_orc.c | 5 ++++- tools/objtool/orc_dump.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index 73f800100066..2a1d47f47eee 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -471,7 +471,7 @@ bool unwind_next_frame(struct unwind_state *state) break; case ORC_REG_SP_INDIRECT: - sp = state->sp + orc->sp_offset; + sp = state->sp; indirect = true; break; @@ -521,6 +521,9 @@ bool unwind_next_frame(struct unwind_state *state) if (indirect) { if (!deref_stack_reg(state, sp, &sp)) goto err; + + if (orc->sp_reg == ORC_REG_SP_INDIRECT) + sp += orc->sp_offset; } /* Find IP, SP and possibly regs: */ diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c index c53fae9dbe93..f5a8508c42d6 100644 --- a/tools/objtool/orc_dump.c +++ b/tools/objtool/orc_dump.c @@ -55,7 +55,7 @@ static void print_reg(unsigned int reg, int offset) if (reg == ORC_REG_BP_INDIRECT) printf("(bp%+d)", offset); else if (reg == ORC_REG_SP_INDIRECT) - printf("(sp%+d)", offset); + printf("(sp)%+d", offset); else if (reg == ORC_REG_UNDEFINED) printf("(und)"); else -- cgit v1.2.3 From 2a512829840eb97a8b52eca7058e56d484468f2d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Feb 2021 12:02:18 +0100 Subject: objtool,x86: Additionally decode: mov %rsp, (%reg) Where we already decode: mov %rsp, %reg, also decode mov %rsp, (%reg). Nothing should match for this new stack-op. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf --- tools/objtool/arch/x86/decode.c | 42 +++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 9637e3bf5ab8..549813cff8ab 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -222,15 +222,38 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, break; case 0x89: - if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) { + if (rex_w && !rex_r && modrm_reg == 4) { - /* mov %rsp, reg */ - ADD_OP(op) { - op->src.type = OP_SRC_REG; - op->src.reg = CFI_SP; - op->dest.type = OP_DEST_REG; - op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + if (modrm_mod == 3) { + /* mov %rsp, reg */ + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + } + break; + + } else { + /* skip nontrivial SIB */ + if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x)) + break; + + /* skip RIP relative displacement */ + if (modrm_rm == 5 && modrm_mod == 0) + break; + + /* mov %rsp, disp(%reg) */ + ADD_OP(op) { + op->src.type = OP_SRC_REG; + op->src.reg = CFI_SP; + op->dest.type = OP_DEST_REG_INDIRECT; + op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b]; + op->dest.offset = insn.displacement.value; + } + break; } + break; } @@ -259,8 +282,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_BP; op->dest.offset = insn.displacement.value; } + break; + } - } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { + if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) { /* mov reg, disp(%rsp) */ ADD_OP(op) { @@ -270,6 +295,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec, op->dest.reg = CFI_SP; op->dest.offset = insn.displacement.value; } + break; } break; -- cgit v1.2.3 From aafeb14e9da29e323b0605f8f1bae0d45d5f3acf Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Feb 2021 12:02:17 +0100 Subject: objtool: Support stack-swizzle Natively support the stack swizzle pattern: mov %rsp, (%[tos]) mov %[tos], %rsp ... pop %rsp It uses the vals[] array to link the first two stack-ops, and detect the SP to SP_INDIRECT swizzle. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Miroslav Benes Acked-by: Josh Poimboeuf --- tools/objtool/check.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f056ddab4fa..62cd211ec45c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1945,6 +1945,38 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, cfa->offset = -cfi->vals[op->src.reg].offset; cfi->stack_size = cfa->offset; + } else if (cfa->base == CFI_SP && + cfi->vals[op->src.reg].base == CFI_SP_INDIRECT && + cfi->vals[op->src.reg].offset == cfa->offset) { + + /* + * Stack swizzle: + * + * 1: mov %rsp, (%[tos]) + * 2: mov %[tos], %rsp + * ... + * 3: pop %rsp + * + * Where: + * + * 1 - places a pointer to the previous + * stack at the Top-of-Stack of the + * new stack. + * + * 2 - switches to the new stack. + * + * 3 - pops the Top-of-Stack to restore + * the original stack. + * + * Note: we set base to SP_INDIRECT + * here and preserve offset. Therefore + * when the unwinder reaches ToS it + * will dereference SP and then add the + * offset to find the next frame, IOW: + * (%rsp) + offset. + */ + cfa->base = CFI_SP_INDIRECT; + } else { cfa->base = CFI_UNDEFINED; cfa->offset = 0; @@ -2047,6 +2079,13 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, case OP_SRC_POP: case OP_SRC_POPF: + if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) { + + /* pop %rsp; # restore from a stack swizzle */ + cfa->base = CFI_SP; + break; + } + if (!cfi->drap && op->dest.reg == cfa->base) { /* pop %rbp */ @@ -2193,6 +2232,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, /* mov reg, disp(%rsp) */ save_reg(cfi, op->src.reg, CFI_CFA, op->dest.offset - cfi->stack_size); + + } else if (op->src.reg == CFI_SP && op->dest.offset == 0) { + + /* mov %rsp, (%reg); # setup a stack swizzle. */ + cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT; + cfi->vals[op->dest.reg].offset = cfa->offset; } break; -- cgit v1.2.3 From 1838b06bf01ac2b1b9ea808aa5962d5324b4da8f Mon Sep 17 00:00:00 2001 From: Ignacio Alvarado Date: Sat, 13 Feb 2021 00:14:52 +0000 Subject: selftests: kvm: add hardware_disable test This test launches 512 VMs in serial and kills them after a random amount of time. The test was original written to exercise KVM user notifiers in the context of1650b4ebc99d: - KVM: Disable irq while unregistering user notifier - https://lore.kernel.org/kvm/CACXrx53vkO=HKfwWwk+fVpvxcNjPrYmtDZ10qWxFvVX_PTGp3g@mail.gmail.com/ Recently, this test piqued my interest because it proved useful to for AMD SNP in exercising the "in-use" pages, described in APM section 15.36.12, "Running SNP-Active Virtual Machines". Signed-off-by: Ignacio Alvarado Signed-off-by: Marc Orr Message-Id: <20210213001452.1719001-1-marcorr@google.com> Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/.gitignore | 1 + tools/testing/selftests/kvm/Makefile | 1 + .../testing/selftests/kvm/hardware_disable_test.c | 165 +++++++++++++++++++++ 3 files changed, 167 insertions(+) create mode 100644 tools/testing/selftests/kvm/hardware_disable_test.c (limited to 'tools') diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 3a84394829ea..32b87cc77c8e 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -33,6 +33,7 @@ /demand_paging_test /dirty_log_test /dirty_log_perf_test +/hardware_disable_test /kvm_create_max_vcpus /memslot_modification_stress_test /set_memory_region_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 8c8eda429576..a6d61f451f88 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -67,6 +67,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test +TEST_GEN_PROGS_x86_64 += hardware_disable_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += set_memory_region_test diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c new file mode 100644 index 000000000000..2f2eeb8a1d86 --- /dev/null +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This test is intended to reproduce a crash that happens when + * kvm_arch_hardware_disable is called and it attempts to unregister the user + * return notifiers. + */ + +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "kvm_util.h" + +#define VCPU_NUM 4 +#define SLEEPING_THREAD_NUM (1 << 4) +#define FORK_NUM (1ULL << 9) +#define DELAY_US_MAX 2000 +#define GUEST_CODE_PIO_PORT 4 + +sem_t *sem; + +/* Arguments for the pthreads */ +struct payload { + struct kvm_vm *vm; + uint32_t index; +}; + +static void guest_code(void) +{ + for (;;) + ; /* Some busy work */ + printf("Should not be reached.\n"); +} + +static void *run_vcpu(void *arg) +{ + struct payload *payload = (struct payload *)arg; + struct kvm_run *state = vcpu_state(payload->vm, payload->index); + + vcpu_run(payload->vm, payload->index); + + TEST_ASSERT(false, "%s: exited with reason %d: %s\n", + __func__, state->exit_reason, + exit_reason_str(state->exit_reason)); + pthread_exit(NULL); +} + +static void *sleeping_thread(void *arg) +{ + int fd; + + while (true) { + fd = open("/dev/null", O_RDWR); + close(fd); + } + TEST_ASSERT(false, "%s: exited\n", __func__); + pthread_exit(NULL); +} + +static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr, + void *(*f)(void *), void *arg) +{ + int r; + + r = pthread_create(thread, attr, f, arg); + TEST_ASSERT(r == 0, "%s: failed to create thread", __func__); +} + +static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set) +{ + int r; + + r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set); + TEST_ASSERT(r == 0, "%s: failed set affinity", __func__); +} + +static inline void check_join(pthread_t thread, void **retval) +{ + int r; + + r = pthread_join(thread, retval); + TEST_ASSERT(r == 0, "%s: failed to join thread", __func__); +} + +static void run_test(uint32_t run) +{ + struct kvm_vm *vm; + cpu_set_t cpu_set; + pthread_t threads[VCPU_NUM]; + pthread_t throw_away; + struct payload payloads[VCPU_NUM]; + void *b; + uint32_t i, j; + + CPU_ZERO(&cpu_set); + for (i = 0; i < VCPU_NUM; i++) + CPU_SET(i, &cpu_set); + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); + vm_create_irqchip(vm); + + fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run); + for (i = 0; i < VCPU_NUM; ++i) { + vm_vcpu_add_default(vm, i, guest_code); + payloads[i].vm = vm; + payloads[i].index = i; + + check_create_thread(&threads[i], NULL, run_vcpu, + (void *)&payloads[i]); + check_set_affinity(threads[i], &cpu_set); + + for (j = 0; j < SLEEPING_THREAD_NUM; ++j) { + check_create_thread(&throw_away, NULL, sleeping_thread, + (void *)NULL); + check_set_affinity(throw_away, &cpu_set); + } + } + fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run); + sem_post(sem); + for (i = 0; i < VCPU_NUM; ++i) + check_join(threads[i], &b); + /* Should not be reached */ + TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run); +} + +int main(int argc, char **argv) +{ + uint32_t i; + int s, r; + pid_t pid; + + sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0); + sem_unlink("vm_sem"); + + for (i = 0; i < FORK_NUM; ++i) { + pid = fork(); + TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__); + if (pid == 0) + run_test(i); /* This function always exits */ + + fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i); + sem_wait(sem); + r = (rand() % DELAY_US_MAX) + 1; + fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r); + usleep(r); + r = waitpid(pid, &s, WNOHANG); + TEST_ASSERT(r != pid, + "%s: [%d] child exited unexpectedly status: [%d]", + __func__, i, s); + fprintf(stderr, "%s: [%d] killing child\n", __func__, i); + kill(pid, SIGKILL); + } + + sem_destroy(sem); + exit(0); +} -- cgit v1.2.3 From bcd22e145b9a65dd603c7b3d8079e948922787e4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 15 Feb 2021 11:42:01 -0500 Subject: selftests: kvm: avoid uninitialized variable warning The variable in practice will never be uninitialized, because the loop will always go through at least one iteration. In case it would not, make vcpu_get_cpuid report an assertion failure. Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/x86_64/processor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index de0c76177d02..a8906e60a108 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -720,7 +720,8 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid) { struct vcpu *vcpu = vcpu_find(vm, vcpuid); struct kvm_cpuid2 *cpuid; - int rc, max_ent; + int max_ent; + int rc = -1; TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid); -- cgit v1.2.3 From 724c8a23d589d8a002d2e39633c2f9a5a429616f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 18 Feb 2021 17:14:10 +0100 Subject: objtool: Fix stack-swizzle for FRAME_POINTER=y When objtool encounters the stack-swizzle: mov %rsp, (%[tos]) mov %[tos], %rsp ... pop %rsp Inside a FRAME_POINTER=y build, things go a little screwy because clearly we're not adjusting the cfa->base. This then results in the pop %rsp not being detected as a restore of cfa->base so it will turn into a regular POP and offset the stack, resulting in: kernel/softirq.o: warning: objtool: do_softirq()+0xdb: return with modified stack frame Therefore, have "mov %[tos], %rsp" act like a PUSH (it sorta is anyway) to balance the things out. We're not too concerned with the actual stack_size for frame-pointer builds, since we don't generate ORC data for them anyway. Fixes: aafeb14e9da2 ("objtool: Support stack-swizzle") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Acked-by: Josh Poimboeuf Link: https://lkml.kernel.org/r/YC6UC+rc9KKmQrkd@hirez.programming.kicks-ass.net --- tools/objtool/check.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 8e74210f4235..2087974c70d3 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -1983,6 +1983,20 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi, } } + else if (op->dest.reg == CFI_SP && + cfi->vals[op->src.reg].base == CFI_SP_INDIRECT && + cfi->vals[op->src.reg].offset == cfa->offset) { + + /* + * The same stack swizzle case 2) as above. But + * because we can't change cfa->base, case 3) + * will become a regular POP. Pretend we're a + * PUSH so things don't go unbalanced. + */ + cfi->stack_size += 8; + } + + break; case OP_SRC_ADD: -- cgit v1.2.3 From 99d0021569c71c325f41a7dd0a08a380010ce95c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2020 15:14:09 -0700 Subject: objtool: Add a pass for generating __mcount_loc Add the --mcount option for generating __mcount_loc sections needed for dynamic ftrace. Using this pass requires the kernel to be compiled with -mfentry and CC_USING_NOP_MCOUNT to be defined in Makefile. Link: https://lore.kernel.org/lkml/20200625200235.GQ4781@hirez.programming.kicks-ass.net/ Signed-off-by: Peter Zijlstra [Sami: rebased, dropped config changes, fixed to actually use --mcount, and wrote a commit message.] Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook --- tools/objtool/builtin-check.c | 3 +- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 82 +++++++++++++++++++++++++++++++++++++++++++ tools/objtool/check.h | 1 + tools/objtool/objtool.c | 1 + tools/objtool/objtool.h | 1 + 6 files changed, 88 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index c6d199bfd0ae..e92e76f69176 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,7 @@ #include "builtin.h" #include "objtool.h" -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount; static const char * const check_usage[] = { "objtool check [] file.o", @@ -35,6 +35,7 @@ const struct option check_options[] = { OPT_BOOLEAN('s', "stats", &stats, "print statistics"), OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), + OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_END(), }; diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 85c979caa367..94565a72b701 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -8,7 +8,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5f8d3eed78a1..44e3ee1425cd 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -530,6 +530,65 @@ static int create_static_call_sections(struct objtool_file *file) return 0; } +static int create_mcount_loc_sections(struct objtool_file *file) +{ + struct section *sec, *reloc_sec; + struct reloc *reloc; + unsigned long *loc; + struct instruction *insn; + int idx; + + sec = find_section_by_name(file->elf, "__mcount_loc"); + if (sec) { + INIT_LIST_HEAD(&file->mcount_loc_list); + WARN("file already has __mcount_loc section, skipping"); + return 0; + } + + if (list_empty(&file->mcount_loc_list)) + return 0; + + idx = 0; + list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) + idx++; + + sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx); + if (!sec) + return -1; + + reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA); + if (!reloc_sec) + return -1; + + idx = 0; + list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) { + + loc = (unsigned long *)sec->data->d_buf + idx; + memset(loc, 0, sizeof(unsigned long)); + + reloc = malloc(sizeof(*reloc)); + if (!reloc) { + perror("malloc"); + return -1; + } + memset(reloc, 0, sizeof(*reloc)); + + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + reloc->type = R_X86_64_64; + reloc->offset = idx * sizeof(unsigned long); + reloc->sec = reloc_sec; + elf_add_reloc(file->elf, reloc); + + idx++; + } + + if (elf_rebuild_reloc_section(file->elf, reloc_sec)) + return -1; + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -956,6 +1015,22 @@ static int add_call_destinations(struct objtool_file *file) insn->type = INSN_NOP; } + if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) { + if (reloc) { + reloc->type = R_NONE; + elf_write_reloc(file->elf, reloc); + } + + elf_write_insn(file->elf, insn->sec, + insn->offset, insn->len, + arch_nop_insn(insn->len)); + + insn->type = INSN_NOP; + + list_add_tail(&insn->mcount_loc_node, + &file->mcount_loc_list); + } + /* * Whatever stack impact regular CALLs have, should be undone * by the RETURN of the called function. @@ -2927,6 +3002,13 @@ int check(struct objtool_file *file) goto out; warnings += ret; + if (mcount) { + ret = create_mcount_loc_sections(file); + if (ret < 0) + goto out; + warnings += ret; + } + out: if (ret < 0) { /* diff --git a/tools/objtool/check.h b/tools/objtool/check.h index 5ec00a4b891b..070baec2050a 100644 --- a/tools/objtool/check.h +++ b/tools/objtool/check.h @@ -23,6 +23,7 @@ struct instruction { struct list_head list; struct hlist_node hash; struct list_head static_call_node; + struct list_head mcount_loc_node; struct section *sec; unsigned long offset; unsigned int len; diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c index 9df0cd86d310..c1819a6f2a18 100644 --- a/tools/objtool/objtool.c +++ b/tools/objtool/objtool.c @@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(const char *_objname) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); INIT_LIST_HEAD(&file.static_call_list); + INIT_LIST_HEAD(&file.mcount_loc_list); file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment"); file.ignore_unreachables = no_unreachable; file.hints = false; diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h index 4125d4578b23..cf004dd60c2b 100644 --- a/tools/objtool/objtool.h +++ b/tools/objtool/objtool.h @@ -19,6 +19,7 @@ struct objtool_file { struct list_head insn_list; DECLARE_HASHTABLE(insn_hash, 20); struct list_head static_call_list; + struct list_head mcount_loc_list; bool ignore_unreachables, c_file, hints, rodata; }; -- cgit v1.2.3 From 18a14575ae31c5a97a5e87e961932a5016d369be Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 28 Oct 2020 10:16:26 -0700 Subject: objtool: Fix __mcount_loc generation with Clang's assembler When objtool generates relocations for the __mcount_loc section, it tries to reference __fentry__ calls by their section symbol offset. However, this fails with Clang's integrated assembler as it may not generate section symbols for every section. This patch looks up a function symbol instead if the section symbol is missing, similarly to commit e81e07244325 ("objtool: Support Clang non-section symbols in ORC generation"). Signed-off-by: Sami Tolvanen --- tools/objtool/check.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 44e3ee1425cd..ab87fb92793c 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -573,8 +573,21 @@ static int create_mcount_loc_sections(struct objtool_file *file) } memset(reloc, 0, sizeof(*reloc)); - reloc->sym = insn->sec->sym; - reloc->addend = insn->offset; + if (insn->sec->sym) { + reloc->sym = insn->sec->sym; + reloc->addend = insn->offset; + } else { + reloc->sym = find_symbol_containing(insn->sec, insn->offset); + + if (!reloc->sym) { + WARN("missing symbol for insn at offset 0x%lx\n", + insn->offset); + return -1; + } + + reloc->addend = insn->offset - reloc->sym->offset; + } + reloc->type = R_X86_64_64; reloc->offset = idx * sizeof(unsigned long); reloc->sec = reloc_sec; -- cgit v1.2.3 From 0e731dbc18241d68318e0a7d2c2c0087c9073fb9 Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Fri, 17 Jul 2020 12:04:27 -0700 Subject: objtool: Don't autodetect vmlinux.o With LTO, we run objtool on vmlinux.o, but don't want noinstr validation. This change requires --vmlinux to be passed to objtool explicitly. Suggested-by: Peter Zijlstra Signed-off-by: Sami Tolvanen Reviewed-by: Kees Cook --- scripts/link-vmlinux.sh | 2 +- tools/objtool/builtin-check.c | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index c5919d5a0b4f..423a4106f0dd 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -106,7 +106,7 @@ objtool_link() local objtoolopt; if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then - objtoolopt="check" + objtoolopt="check --vmlinux" if [ -z "${CONFIG_FRAME_POINTER}" ]; then objtoolopt="${objtoolopt} --no-fp" fi diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index e92e76f69176..facfc10bc5dc 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -41,7 +41,7 @@ const struct option check_options[] = { int cmd_check(int argc, const char **argv) { - const char *objname, *s; + const char *objname; struct objtool_file *file; int ret; @@ -52,10 +52,6 @@ int cmd_check(int argc, const char **argv) objname = argv[0]; - s = strstr(objname, "vmlinux.o"); - if (s && !s[9]) - vmlinux = true; - file = objtool_open_read(objname); if (!file) return 1; -- cgit v1.2.3 From 41425ebe20245c99b44d6ba0f017be9bfc28414f Mon Sep 17 00:00:00 2001 From: Sami Tolvanen Date: Wed, 30 Sep 2020 14:36:59 -0700 Subject: objtool: Split noinstr validation from --vmlinux This change adds a --noinstr flag to objtool to allow us to specify that we're processing vmlinux.o without also enabling noinstr validation. This is needed to avoid false positives with LTO when we run objtool on vmlinux.o without CONFIG_DEBUG_ENTRY. Signed-off-by: Sami Tolvanen --- scripts/link-vmlinux.sh | 2 +- tools/objtool/builtin-check.c | 3 ++- tools/objtool/builtin.h | 2 +- tools/objtool/check.c | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index 423a4106f0dd..18aed761f9c0 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -106,7 +106,7 @@ objtool_link() local objtoolopt; if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then - objtoolopt="check --vmlinux" + objtoolopt="check --vmlinux --noinstr" if [ -z "${CONFIG_FRAME_POINTER}" ]; then objtoolopt="${objtoolopt} --no-fp" fi diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index facfc10bc5dc..b84cdc72b51f 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -18,7 +18,7 @@ #include "builtin.h" #include "objtool.h" -bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount; +bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; static const char * const check_usage[] = { "objtool check [] file.o", @@ -34,6 +34,7 @@ const struct option check_options[] = { OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"), OPT_BOOLEAN('s', "stats", &stats, "print statistics"), OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"), + OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"), OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"), OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"), OPT_END(), diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h index 94565a72b701..2502bb27de17 100644 --- a/tools/objtool/builtin.h +++ b/tools/objtool/builtin.h @@ -8,7 +8,7 @@ #include extern const struct option check_options[]; -extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount; +extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr; extern int cmd_check(int argc, const char **argv); extern int cmd_orc(int argc, const char **argv); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index ab87fb92793c..85993e606782 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -244,7 +244,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec) * not correctly determine insn->call_dest->sec (external symbols do * not have a section). */ - if (vmlinux && sec) + if (vmlinux && noinstr && sec) state->noinstr = sec->noinstr; } -- cgit v1.2.3 From d5a49aa6c3e264a93a7d08485d66e346be0969dd Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Mon, 22 Feb 2021 17:25:45 +0100 Subject: wireguard: selftests: test multiple parallel streams In order to test ndo_start_xmit being called in parallel, explicitly add separate tests, which should all run on different cores. This should help tease out bugs associated with queueing up packets from different cores in parallel. Currently, it hasn't found those types of bugs, but given future planned work, this is a useful regression to avoid. Fixes: e7096c131e51 ("net: WireGuard secure network tunnel") Signed-off-by: Jason A. Donenfeld Signed-off-by: Jakub Kicinski --- tools/testing/selftests/wireguard/netns.sh | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 74c69b75f6f5..7ed7cd95e58f 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; } ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; } ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; } sleep() { read -t "$1" -N 1 || true; } -waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } +waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; } waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; } @@ -141,6 +141,19 @@ tests() { n2 iperf3 -s -1 -B fd00::2 & waitiperf $netns2 $! n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2 + + # TCP over IPv4, in parallel + for max in 4 5 50; do + local pids=( ) + for ((i=0; i < max; ++i)) do + n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 & + pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i )) + done + for ((i=0; i < max; ++i)) do + n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 & + done + wait "${pids[@]}" + done } [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}" -- cgit v1.2.3 From f00748bfa0246c428bf93f45267b8f1aa1816098 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Wed, 24 Feb 2021 12:05:05 -0800 Subject: kasan: prefix global functions with kasan_ Patch series "kasan: HW_TAGS tests support and fixes", v4. This patchset adds support for running KASAN-KUnit tests with the hardware tag-based mode and also contains a few fixes. This patch (of 15): There's a number of internal KASAN functions that are used across multiple source code files and therefore aren't marked as static inline. To avoid littering the kernel function names list with generic function names, prefix all such KASAN functions with kasan_. As a part of this change: - Rename internal (un)poison_range() to kasan_(un)poison() (no _range) to avoid name collision with a public kasan_unpoison_range(). - Rename check_memory_region() to kasan_check_range(), as it's a more fitting name. Link: https://lkml.kernel.org/r/cover.1610733117.git.andreyknvl@google.com Link: https://linux-review.googlesource.com/id/I719cc93483d4ba288a634dba80ee6b7f2809cd26 Link: https://lkml.kernel.org/r/13777aedf8d3ebbf35891136e1f2287e2f34aaba.1610733117.git.andreyknvl@google.com Signed-off-by: Andrey Konovalov Suggested-by: Marco Elver Reviewed-by: Marco Elver Reviewed-by: Alexander Potapenko Cc: Catalin Marinas Cc: Vincenzo Frascino Cc: Dmitry Vyukov Cc: Will Deacon Cc: Andrey Ryabinin Cc: Peter Collingbourne Cc: Evgenii Stepanov Cc: Branislav Rankov Cc: Kevin Brodsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/kasan/common.c | 47 +++++++++++++++++++++++----------------------- mm/kasan/generic.c | 36 +++++++++++++++++------------------ mm/kasan/kasan.h | 48 +++++++++++++++++++++++------------------------ mm/kasan/quarantine.c | 22 +++++++++++----------- mm/kasan/report.c | 13 +++++++------ mm/kasan/report_generic.c | 8 ++++---- mm/kasan/report_hw_tags.c | 8 ++++---- mm/kasan/report_sw_tags.c | 8 ++++---- mm/kasan/shadow.c | 26 ++++++++++++------------- mm/kasan/sw_tags.c | 16 ++++++++-------- tools/objtool/check.c | 2 +- 11 files changed, 117 insertions(+), 117 deletions(-) (limited to 'tools') diff --git a/mm/kasan/common.c b/mm/kasan/common.c index b25167664ead..eedc3e0fe365 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -60,7 +60,7 @@ void kasan_disable_current(void) void __kasan_unpoison_range(const void *address, size_t size) { - unpoison_range(address, size); + kasan_unpoison(address, size); } #if CONFIG_KASAN_STACK @@ -69,7 +69,7 @@ void kasan_unpoison_task_stack(struct task_struct *task) { void *base = task_stack_page(task); - unpoison_range(base, THREAD_SIZE); + kasan_unpoison(base, THREAD_SIZE); } /* Unpoison the stack for the current task beyond a watermark sp value. */ @@ -82,7 +82,7 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark) */ void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1)); - unpoison_range(base, watermark - base); + kasan_unpoison(base, watermark - base); } #endif /* CONFIG_KASAN_STACK */ @@ -105,18 +105,17 @@ void __kasan_alloc_pages(struct page *page, unsigned int order) if (unlikely(PageHighMem(page))) return; - tag = random_tag(); + tag = kasan_random_tag(); for (i = 0; i < (1 << order); i++) page_kasan_tag_set(page + i, tag); - unpoison_range(page_address(page), PAGE_SIZE << order); + kasan_unpoison(page_address(page), PAGE_SIZE << order); } void __kasan_free_pages(struct page *page, unsigned int order) { if (likely(!PageHighMem(page))) - poison_range(page_address(page), - PAGE_SIZE << order, - KASAN_FREE_PAGE); + kasan_poison(page_address(page), PAGE_SIZE << order, + KASAN_FREE_PAGE); } /* @@ -246,18 +245,18 @@ void __kasan_poison_slab(struct page *page) for (i = 0; i < compound_nr(page); i++) page_kasan_tag_reset(page + i); - poison_range(page_address(page), page_size(page), + kasan_poison(page_address(page), page_size(page), KASAN_KMALLOC_REDZONE); } void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object) { - unpoison_range(object, cache->object_size); + kasan_unpoison(object, cache->object_size); } void __kasan_poison_object_data(struct kmem_cache *cache, void *object) { - poison_range(object, cache->object_size, KASAN_KMALLOC_REDZONE); + kasan_poison(object, cache->object_size, KASAN_KMALLOC_REDZONE); } /* @@ -294,7 +293,7 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object, * set, assign a tag when the object is being allocated (init == false). */ if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU)) - return init ? KASAN_TAG_KERNEL : random_tag(); + return init ? KASAN_TAG_KERNEL : kasan_random_tag(); /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */ #ifdef CONFIG_SLAB @@ -305,7 +304,7 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object, * For SLUB assign a random tag during slab creation, otherwise reuse * the already assigned tag. */ - return init ? random_tag() : get_tag(object); + return init ? kasan_random_tag() : get_tag(object); #endif } @@ -346,12 +345,12 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) return false; - if (check_invalid_free(tagged_object)) { + if (kasan_check_invalid_free(tagged_object)) { kasan_report_invalid_free(tagged_object, ip); return true; } - poison_range(object, cache->object_size, KASAN_KMALLOC_FREE); + kasan_poison(object, cache->object_size, KASAN_KMALLOC_FREE); if (!kasan_stack_collection_enabled()) return false; @@ -361,7 +360,7 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object, kasan_set_free_info(cache, object, tag); - return quarantine_put(cache, object); + return kasan_quarantine_put(cache, object); } bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip) @@ -386,7 +385,7 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip) kasan_report_invalid_free(ptr, ip); return; } - poison_range(ptr, page_size(page), KASAN_FREE_PAGE); + kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE); } else { ____kasan_slab_free(page->slab_cache, ptr, ip, false); } @@ -409,7 +408,7 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, u8 tag; if (gfpflags_allow_blocking(flags)) - quarantine_reduce(); + kasan_quarantine_reduce(); if (unlikely(object == NULL)) return NULL; @@ -421,9 +420,9 @@ static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object, tag = assign_tag(cache, object, false, keep_tag); /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */ - unpoison_range(set_tag(object, tag), size); - poison_range((void *)redzone_start, redzone_end - redzone_start, - KASAN_KMALLOC_REDZONE); + kasan_unpoison(set_tag(object, tag), size); + kasan_poison((void *)redzone_start, redzone_end - redzone_start, + KASAN_KMALLOC_REDZONE); if (kasan_stack_collection_enabled()) set_alloc_info(cache, (void *)object, flags); @@ -452,7 +451,7 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, unsigned long redzone_end; if (gfpflags_allow_blocking(flags)) - quarantine_reduce(); + kasan_quarantine_reduce(); if (unlikely(ptr == NULL)) return NULL; @@ -462,8 +461,8 @@ void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size, KASAN_GRANULE_SIZE); redzone_end = (unsigned long)ptr + page_size(page); - unpoison_range(ptr, size); - poison_range((void *)redzone_start, redzone_end - redzone_start, + kasan_unpoison(ptr, size); + kasan_poison((void *)redzone_start, redzone_end - redzone_start, KASAN_PAGE_REDZONE); return (void *)ptr; diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c index 5106b84b07d4..acab8862dc67 100644 --- a/mm/kasan/generic.c +++ b/mm/kasan/generic.c @@ -158,7 +158,7 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size) return memory_is_poisoned_n(addr, size); } -static __always_inline bool check_memory_region_inline(unsigned long addr, +static __always_inline bool check_region_inline(unsigned long addr, size_t size, bool write, unsigned long ret_ip) { @@ -179,13 +179,13 @@ static __always_inline bool check_memory_region_inline(unsigned long addr, return !kasan_report(addr, size, write, ret_ip); } -bool check_memory_region(unsigned long addr, size_t size, bool write, - unsigned long ret_ip) +bool kasan_check_range(unsigned long addr, size_t size, bool write, + unsigned long ret_ip) { - return check_memory_region_inline(addr, size, write, ret_ip); + return check_region_inline(addr, size, write, ret_ip); } -bool check_invalid_free(void *addr) +bool kasan_check_invalid_free(void *addr) { s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr)); @@ -194,22 +194,22 @@ bool check_invalid_free(void *addr) void kasan_cache_shrink(struct kmem_cache *cache) { - quarantine_remove_cache(cache); + kasan_quarantine_remove_cache(cache); } void kasan_cache_shutdown(struct kmem_cache *cache) { if (!__kmem_cache_empty(cache)) - quarantine_remove_cache(cache); + kasan_quarantine_remove_cache(cache); } static void register_global(struct kasan_global *global) { size_t aligned_size = round_up(global->size, KASAN_GRANULE_SIZE); - unpoison_range(global->beg, global->size); + kasan_unpoison(global->beg, global->size); - poison_range(global->beg + aligned_size, + kasan_poison(global->beg + aligned_size, global->size_with_redzone - aligned_size, KASAN_GLOBAL_REDZONE); } @@ -231,7 +231,7 @@ EXPORT_SYMBOL(__asan_unregister_globals); #define DEFINE_ASAN_LOAD_STORE(size) \ void __asan_load##size(unsigned long addr) \ { \ - check_memory_region_inline(addr, size, false, _RET_IP_);\ + check_region_inline(addr, size, false, _RET_IP_); \ } \ EXPORT_SYMBOL(__asan_load##size); \ __alias(__asan_load##size) \ @@ -239,7 +239,7 @@ EXPORT_SYMBOL(__asan_unregister_globals); EXPORT_SYMBOL(__asan_load##size##_noabort); \ void __asan_store##size(unsigned long addr) \ { \ - check_memory_region_inline(addr, size, true, _RET_IP_); \ + check_region_inline(addr, size, true, _RET_IP_); \ } \ EXPORT_SYMBOL(__asan_store##size); \ __alias(__asan_store##size) \ @@ -254,7 +254,7 @@ DEFINE_ASAN_LOAD_STORE(16); void __asan_loadN(unsigned long addr, size_t size) { - check_memory_region(addr, size, false, _RET_IP_); + kasan_check_range(addr, size, false, _RET_IP_); } EXPORT_SYMBOL(__asan_loadN); @@ -264,7 +264,7 @@ EXPORT_SYMBOL(__asan_loadN_noabort); void __asan_storeN(unsigned long addr, size_t size) { - check_memory_region(addr, size, true, _RET_IP_); + kasan_check_range(addr, size, true, _RET_IP_); } EXPORT_SYMBOL(__asan_storeN); @@ -290,11 +290,11 @@ void __asan_alloca_poison(unsigned long addr, size_t size) WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE)); - unpoison_range((const void *)(addr + rounded_down_size), - size - rounded_down_size); - poison_range(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, + kasan_unpoison((const void *)(addr + rounded_down_size), + size - rounded_down_size); + kasan_poison(left_redzone, KASAN_ALLOCA_REDZONE_SIZE, KASAN_ALLOCA_LEFT); - poison_range(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE, + kasan_poison(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE, KASAN_ALLOCA_RIGHT); } EXPORT_SYMBOL(__asan_alloca_poison); @@ -305,7 +305,7 @@ void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom) if (unlikely(!stack_top || stack_top > stack_bottom)) return; - unpoison_range(stack_top, stack_bottom - stack_top); + kasan_unpoison(stack_top, stack_bottom - stack_top); } EXPORT_SYMBOL(__asan_allocas_unpoison); diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8c706e7652f2..3810e75b8eea 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -195,14 +195,14 @@ static inline bool addr_has_metadata(const void *addr) } /** - * check_memory_region - Check memory region, and report if invalid access. + * kasan_check_range - Check memory region, and report if invalid access. * @addr: the accessed address * @size: the accessed size * @write: true if access is a write access * @ret_ip: return address * @return: true if access was valid, false if invalid */ -bool check_memory_region(unsigned long addr, size_t size, bool write, +bool kasan_check_range(unsigned long addr, size_t size, bool write, unsigned long ret_ip); #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ @@ -215,19 +215,19 @@ static inline bool addr_has_metadata(const void *addr) #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */ #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS) -void print_tags(u8 addr_tag, const void *addr); +void kasan_print_tags(u8 addr_tag, const void *addr); #else -static inline void print_tags(u8 addr_tag, const void *addr) { } +static inline void kasan_print_tags(u8 addr_tag, const void *addr) { } #endif -void *find_first_bad_addr(void *addr, size_t size); -const char *get_bug_type(struct kasan_access_info *info); -void metadata_fetch_row(char *buffer, void *row); +void *kasan_find_first_bad_addr(void *addr, size_t size); +const char *kasan_get_bug_type(struct kasan_access_info *info); +void kasan_metadata_fetch_row(char *buffer, void *row); #if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK -void print_address_stack_frame(const void *addr); +void kasan_print_address_stack_frame(const void *addr); #else -static inline void print_address_stack_frame(const void *addr) { } +static inline void kasan_print_address_stack_frame(const void *addr) { } #endif bool kasan_report(unsigned long addr, size_t size, @@ -244,13 +244,13 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache, #if defined(CONFIG_KASAN_GENERIC) && \ (defined(CONFIG_SLAB) || defined(CONFIG_SLUB)) -bool quarantine_put(struct kmem_cache *cache, void *object); -void quarantine_reduce(void); -void quarantine_remove_cache(struct kmem_cache *cache); +bool kasan_quarantine_put(struct kmem_cache *cache, void *object); +void kasan_quarantine_reduce(void); +void kasan_quarantine_remove_cache(struct kmem_cache *cache); #else -static inline bool quarantine_put(struct kmem_cache *cache, void *object) { return false; } -static inline void quarantine_reduce(void) { } -static inline void quarantine_remove_cache(struct kmem_cache *cache) { } +static inline bool kasan_quarantine_put(struct kmem_cache *cache, void *object) { return false; } +static inline void kasan_quarantine_reduce(void) { } +static inline void kasan_quarantine_remove_cache(struct kmem_cache *cache) { } #endif #ifndef arch_kasan_set_tag @@ -293,28 +293,28 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag) #endif /* CONFIG_KASAN_HW_TAGS */ #ifdef CONFIG_KASAN_SW_TAGS -u8 random_tag(void); +u8 kasan_random_tag(void); #elif defined(CONFIG_KASAN_HW_TAGS) -static inline u8 random_tag(void) { return hw_get_random_tag(); } +static inline u8 kasan_random_tag(void) { return hw_get_random_tag(); } #else -static inline u8 random_tag(void) { return 0; } +static inline u8 kasan_random_tag(void) { return 0; } #endif #ifdef CONFIG_KASAN_HW_TAGS -static inline void poison_range(const void *address, size_t size, u8 value) +static inline void kasan_poison(const void *address, size_t size, u8 value) { hw_set_mem_tag_range(kasan_reset_tag(address), round_up(size, KASAN_GRANULE_SIZE), value); } -static inline void unpoison_range(const void *address, size_t size) +static inline void kasan_unpoison(const void *address, size_t size) { hw_set_mem_tag_range(kasan_reset_tag(address), round_up(size, KASAN_GRANULE_SIZE), get_tag(address)); } -static inline bool check_invalid_free(void *addr) +static inline bool kasan_check_invalid_free(void *addr) { u8 ptr_tag = get_tag(addr); u8 mem_tag = hw_get_mem_tag(addr); @@ -325,9 +325,9 @@ static inline bool check_invalid_free(void *addr) #else /* CONFIG_KASAN_HW_TAGS */ -void poison_range(const void *address, size_t size, u8 value); -void unpoison_range(const void *address, size_t size); -bool check_invalid_free(void *addr); +void kasan_poison(const void *address, size_t size, u8 value); +void kasan_unpoison(const void *address, size_t size); +bool kasan_check_invalid_free(void *addr); #endif /* CONFIG_KASAN_HW_TAGS */ diff --git a/mm/kasan/quarantine.c b/mm/kasan/quarantine.c index 55783125a767..728fb24c5683 100644 --- a/mm/kasan/quarantine.c +++ b/mm/kasan/quarantine.c @@ -168,7 +168,7 @@ static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache) qlist_init(q); } -bool quarantine_put(struct kmem_cache *cache, void *object) +bool kasan_quarantine_put(struct kmem_cache *cache, void *object) { unsigned long flags; struct qlist_head *q; @@ -184,11 +184,11 @@ bool quarantine_put(struct kmem_cache *cache, void *object) /* * Note: irq must be disabled until after we move the batch to the - * global quarantine. Otherwise quarantine_remove_cache() can miss - * some objects belonging to the cache if they are in our local temp - * list. quarantine_remove_cache() executes on_each_cpu() at the - * beginning which ensures that it either sees the objects in per-cpu - * lists or in the global quarantine. + * global quarantine. Otherwise kasan_quarantine_remove_cache() can + * miss some objects belonging to the cache if they are in our local + * temp list. kasan_quarantine_remove_cache() executes on_each_cpu() + * at the beginning which ensures that it either sees the objects in + * per-cpu lists or in the global quarantine. */ local_irq_save(flags); @@ -222,7 +222,7 @@ bool quarantine_put(struct kmem_cache *cache, void *object) return true; } -void quarantine_reduce(void) +void kasan_quarantine_reduce(void) { size_t total_size, new_quarantine_size, percpu_quarantines; unsigned long flags; @@ -234,7 +234,7 @@ void quarantine_reduce(void) return; /* - * srcu critical section ensures that quarantine_remove_cache() + * srcu critical section ensures that kasan_quarantine_remove_cache() * will not miss objects belonging to the cache while they are in our * local to_free list. srcu is chosen because (1) it gives us private * grace period domain that does not interfere with anything else, @@ -309,15 +309,15 @@ static void per_cpu_remove_cache(void *arg) } /* Free all quarantined objects belonging to cache. */ -void quarantine_remove_cache(struct kmem_cache *cache) +void kasan_quarantine_remove_cache(struct kmem_cache *cache) { unsigned long flags, i; struct qlist_head to_free = QLIST_INIT; /* * Must be careful to not miss any objects that are being moved from - * per-cpu list to the global quarantine in quarantine_put(), - * nor objects being freed in quarantine_reduce(). on_each_cpu() + * per-cpu list to the global quarantine in kasan_quarantine_put(), + * nor objects being freed in kasan_quarantine_reduce(). on_each_cpu() * achieves the first goal, while synchronize_srcu() achieves the * second. */ diff --git a/mm/kasan/report.c b/mm/kasan/report.c index c0fb21797550..e93d7973792e 100644 --- a/mm/kasan/report.c +++ b/mm/kasan/report.c @@ -61,7 +61,7 @@ __setup("kasan_multi_shot", kasan_set_multi_shot); static void print_error_description(struct kasan_access_info *info) { pr_err("BUG: KASAN: %s in %pS\n", - get_bug_type(info), (void *)info->ip); + kasan_get_bug_type(info), (void *)info->ip); if (info->access_size) pr_err("%s of size %zu at addr %px by task %s/%d\n", info->is_write ? "Write" : "Read", info->access_size, @@ -247,7 +247,7 @@ static void print_address_description(void *addr, u8 tag) dump_page(page, "kasan: bad access detected"); } - print_address_stack_frame(addr); + kasan_print_address_stack_frame(addr); } static bool meta_row_is_guilty(const void *row, const void *addr) @@ -293,7 +293,7 @@ static void print_memory_metadata(const void *addr) * function, because generic functions may try to * access kasan mapping for the passed address. */ - metadata_fetch_row(&metadata[0], row); + kasan_metadata_fetch_row(&metadata[0], row); print_hex_dump(KERN_ERR, buffer, DUMP_PREFIX_NONE, META_BYTES_PER_ROW, 1, @@ -350,7 +350,7 @@ void kasan_report_invalid_free(void *object, unsigned long ip) start_report(&flags); pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip); - print_tags(tag, object); + kasan_print_tags(tag, object); pr_err("\n"); print_address_description(object, tag); pr_err("\n"); @@ -378,7 +378,8 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, info.access_addr = tagged_addr; if (addr_has_metadata(untagged_addr)) - info.first_bad_addr = find_first_bad_addr(tagged_addr, size); + info.first_bad_addr = + kasan_find_first_bad_addr(tagged_addr, size); else info.first_bad_addr = untagged_addr; info.access_size = size; @@ -389,7 +390,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write, print_error_description(&info); if (addr_has_metadata(untagged_addr)) - print_tags(get_tag(tagged_addr), info.first_bad_addr); + kasan_print_tags(get_tag(tagged_addr), info.first_bad_addr); pr_err("\n"); if (addr_has_metadata(untagged_addr)) { diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 8a9c889872da..41f374585144 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -30,7 +30,7 @@ #include "kasan.h" #include "../slab.h" -void *find_first_bad_addr(void *addr, size_t size) +void *kasan_find_first_bad_addr(void *addr, size_t size) { void *p = addr; @@ -105,7 +105,7 @@ static const char *get_wild_bug_type(struct kasan_access_info *info) return bug_type; } -const char *get_bug_type(struct kasan_access_info *info) +const char *kasan_get_bug_type(struct kasan_access_info *info) { /* * If access_size is a negative number, then it has reason to be @@ -123,7 +123,7 @@ const char *get_bug_type(struct kasan_access_info *info) return get_wild_bug_type(info); } -void metadata_fetch_row(char *buffer, void *row) +void kasan_metadata_fetch_row(char *buffer, void *row) { memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); } @@ -263,7 +263,7 @@ static bool __must_check get_address_stack_frame_info(const void *addr, return true; } -void print_address_stack_frame(const void *addr) +void kasan_print_address_stack_frame(const void *addr) { unsigned long offset; const char *frame_descr; diff --git a/mm/kasan/report_hw_tags.c b/mm/kasan/report_hw_tags.c index 57114f0e14d1..42b2168755d6 100644 --- a/mm/kasan/report_hw_tags.c +++ b/mm/kasan/report_hw_tags.c @@ -15,17 +15,17 @@ #include "kasan.h" -const char *get_bug_type(struct kasan_access_info *info) +const char *kasan_get_bug_type(struct kasan_access_info *info) { return "invalid-access"; } -void *find_first_bad_addr(void *addr, size_t size) +void *kasan_find_first_bad_addr(void *addr, size_t size) { return kasan_reset_tag(addr); } -void metadata_fetch_row(char *buffer, void *row) +void kasan_metadata_fetch_row(char *buffer, void *row) { int i; @@ -33,7 +33,7 @@ void metadata_fetch_row(char *buffer, void *row) buffer[i] = hw_get_mem_tag(row + i * KASAN_GRANULE_SIZE); } -void print_tags(u8 addr_tag, const void *addr) +void kasan_print_tags(u8 addr_tag, const void *addr) { u8 memory_tag = hw_get_mem_tag((void *)addr); diff --git a/mm/kasan/report_sw_tags.c b/mm/kasan/report_sw_tags.c index 1b026793ad57..3d20d3451d9e 100644 --- a/mm/kasan/report_sw_tags.c +++ b/mm/kasan/report_sw_tags.c @@ -29,7 +29,7 @@ #include "kasan.h" #include "../slab.h" -const char *get_bug_type(struct kasan_access_info *info) +const char *kasan_get_bug_type(struct kasan_access_info *info) { #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY struct kasan_alloc_meta *alloc_meta; @@ -72,7 +72,7 @@ const char *get_bug_type(struct kasan_access_info *info) return "invalid-access"; } -void *find_first_bad_addr(void *addr, size_t size) +void *kasan_find_first_bad_addr(void *addr, size_t size) { u8 tag = get_tag(addr); void *p = kasan_reset_tag(addr); @@ -83,12 +83,12 @@ void *find_first_bad_addr(void *addr, size_t size) return p; } -void metadata_fetch_row(char *buffer, void *row) +void kasan_metadata_fetch_row(char *buffer, void *row) { memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); } -void print_tags(u8 addr_tag, const void *addr) +void kasan_print_tags(u8 addr_tag, const void *addr) { u8 *shadow = (u8 *)kasan_mem_to_shadow(addr); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 7c2c08c55f32..38958eb0d653 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -27,20 +27,20 @@ bool __kasan_check_read(const volatile void *p, unsigned int size) { - return check_memory_region((unsigned long)p, size, false, _RET_IP_); + return kasan_check_range((unsigned long)p, size, false, _RET_IP_); } EXPORT_SYMBOL(__kasan_check_read); bool __kasan_check_write(const volatile void *p, unsigned int size) { - return check_memory_region((unsigned long)p, size, true, _RET_IP_); + return kasan_check_range((unsigned long)p, size, true, _RET_IP_); } EXPORT_SYMBOL(__kasan_check_write); #undef memset void *memset(void *addr, int c, size_t len) { - if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_)) + if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_)) return NULL; return __memset(addr, c, len); @@ -50,8 +50,8 @@ void *memset(void *addr, int c, size_t len) #undef memmove void *memmove(void *dest, const void *src, size_t len) { - if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || - !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) + if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || + !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) return NULL; return __memmove(dest, src, len); @@ -61,8 +61,8 @@ void *memmove(void *dest, const void *src, size_t len) #undef memcpy void *memcpy(void *dest, const void *src, size_t len) { - if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) || - !check_memory_region((unsigned long)dest, len, true, _RET_IP_)) + if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) || + !kasan_check_range((unsigned long)dest, len, true, _RET_IP_)) return NULL; return __memcpy(dest, src, len); @@ -72,7 +72,7 @@ void *memcpy(void *dest, const void *src, size_t len) * Poisons the shadow memory for 'size' bytes starting from 'addr'. * Memory addresses should be aligned to KASAN_GRANULE_SIZE. */ -void poison_range(const void *address, size_t size, u8 value) +void kasan_poison(const void *address, size_t size, u8 value) { void *shadow_start, *shadow_end; @@ -90,7 +90,7 @@ void poison_range(const void *address, size_t size, u8 value) __memset(shadow_start, value, shadow_end - shadow_start); } -void unpoison_range(const void *address, size_t size) +void kasan_unpoison(const void *address, size_t size) { u8 tag = get_tag(address); @@ -101,7 +101,7 @@ void unpoison_range(const void *address, size_t size) */ address = kasan_reset_tag(address); - poison_range(address, size, tag); + kasan_poison(address, size, tag); if (size & KASAN_GRANULE_MASK) { u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size); @@ -286,7 +286,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size) * // vmalloc() allocates memory * // let a = area->addr * // we reach kasan_populate_vmalloc - * // and call unpoison_range: + * // and call kasan_unpoison: * STORE shadow(a), unpoison_val * ... * STORE shadow(a+99), unpoison_val x = LOAD p @@ -321,7 +321,7 @@ void kasan_poison_vmalloc(const void *start, unsigned long size) return; size = round_up(size, KASAN_GRANULE_SIZE); - poison_range(start, size, KASAN_VMALLOC_INVALID); + kasan_poison(start, size, KASAN_VMALLOC_INVALID); } void kasan_unpoison_vmalloc(const void *start, unsigned long size) @@ -329,7 +329,7 @@ void kasan_unpoison_vmalloc(const void *start, unsigned long size) if (!is_vmalloc_or_module_addr(start)) return; - unpoison_range(start, size); + kasan_unpoison(start, size); } static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr, diff --git a/mm/kasan/sw_tags.c b/mm/kasan/sw_tags.c index 5dcd830805b2..cc271fceb5d5 100644 --- a/mm/kasan/sw_tags.c +++ b/mm/kasan/sw_tags.c @@ -57,7 +57,7 @@ void __init kasan_init_sw_tags(void) * sequence has in fact positive effect, since interrupts that randomly skew * PRNG at unpredictable points do only good. */ -u8 random_tag(void) +u8 kasan_random_tag(void) { u32 state = this_cpu_read(prng_state); @@ -67,7 +67,7 @@ u8 random_tag(void) return (u8)(state % (KASAN_TAG_MAX + 1)); } -bool check_memory_region(unsigned long addr, size_t size, bool write, +bool kasan_check_range(unsigned long addr, size_t size, bool write, unsigned long ret_ip) { u8 tag; @@ -118,7 +118,7 @@ bool check_memory_region(unsigned long addr, size_t size, bool write, return true; } -bool check_invalid_free(void *addr) +bool kasan_check_invalid_free(void *addr) { u8 tag = get_tag(addr); u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(kasan_reset_tag(addr))); @@ -130,12 +130,12 @@ bool check_invalid_free(void *addr) #define DEFINE_HWASAN_LOAD_STORE(size) \ void __hwasan_load##size##_noabort(unsigned long addr) \ { \ - check_memory_region(addr, size, false, _RET_IP_); \ + kasan_check_range(addr, size, false, _RET_IP_); \ } \ EXPORT_SYMBOL(__hwasan_load##size##_noabort); \ void __hwasan_store##size##_noabort(unsigned long addr) \ { \ - check_memory_region(addr, size, true, _RET_IP_); \ + kasan_check_range(addr, size, true, _RET_IP_); \ } \ EXPORT_SYMBOL(__hwasan_store##size##_noabort) @@ -147,19 +147,19 @@ DEFINE_HWASAN_LOAD_STORE(16); void __hwasan_loadN_noabort(unsigned long addr, unsigned long size) { - check_memory_region(addr, size, false, _RET_IP_); + kasan_check_range(addr, size, false, _RET_IP_); } EXPORT_SYMBOL(__hwasan_loadN_noabort); void __hwasan_storeN_noabort(unsigned long addr, unsigned long size) { - check_memory_region(addr, size, true, _RET_IP_); + kasan_check_range(addr, size, true, _RET_IP_); } EXPORT_SYMBOL(__hwasan_storeN_noabort); void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size) { - poison_range((void *)addr, size, tag); + kasan_poison((void *)addr, size, tag); } EXPORT_SYMBOL(__hwasan_tag_memory); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 080a3d6cbd75..80e7b822a432 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -666,7 +666,7 @@ static void add_ignores(struct objtool_file *file) static const char *uaccess_safe_builtin[] = { /* KASAN */ "kasan_report", - "check_memory_region", + "kasan_check_range", /* KASAN out-of-line */ "__asan_loadN_noabort", "__asan_load1_noabort", -- cgit v1.2.3 From c1d96fa61eb74b1e211f1653acc5b68ac62c8ef4 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 25 Feb 2021 17:52:48 +0100 Subject: tracing/tools: fix a couple of spelling mistakes There is a spelling mistake in the -g help option, I believe it should be "graph". There is also a spelling mistake in a warning message. Fix both mistakes. Link: https://lkml.kernel.org/r/20210225165248.22050-2-Viktor.Rosendahl@bmw.de Signed-off-by: Colin Ian King Signed-off-by: Viktor Rosendahl Signed-off-by: Steven Rostedt (VMware) --- tools/tracing/latency/latency-collector.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c index 57b20802e71b..b69de9263ee6 100644 --- a/tools/tracing/latency/latency-collector.c +++ b/tools/tracing/latency/latency-collector.c @@ -1650,7 +1650,7 @@ static void start_printthread(void) if (ufd < 0 || read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) { printf( -"Warning! Using trivial random nummer seed, since %s not available\n", +"Warning! Using trivial random number seed, since %s not available\n", DEV_URANDOM); fflush(stdout); *seed = i; @@ -1711,8 +1711,8 @@ static void show_usage(void) "\t\t\tbeginning, end, and backtrace.\n\n" "-g, --graph\t\tEnable the display-graph option in trace_option. This\n" -"\t\t\toption causes ftrace to show the functionph of how\n" -"\t\t\tfunctions are calling other functions.\n\n" +"\t\t\toption causes ftrace to show the graph of how functions\n" +"\t\t\tare calling other functions.\n\n" "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n" "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n" -- cgit v1.2.3 From 6528fc0a11de3d16339cf17639e2f69a68fcaf4d Mon Sep 17 00:00:00 2001 From: Aaron Lewis Date: Wed, 10 Feb 2021 08:50:36 -0800 Subject: selftests: kvm: Mmap the entire vcpu mmap area The vcpu mmap area may consist of more than just the kvm_run struct. Allocate enough space for the entire vcpu mmap area. Without this, on x86, the PIO page, for example, will be missing. This is problematic when dealing with an unhandled exception from the guest as the exception vector will be incorrectly reported as 0x0. Message-Id: <20210210165035.3712489-1-aaronlewis@google.com> Reviewed-by: Andrew Jones Co-developed-by: Steve Rutherford Signed-off-by: Aaron Lewis Signed-off-by: Paolo Bonzini --- tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index d787cb802b4a..e5fbf16f725b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -21,6 +21,8 @@ #define KVM_UTIL_PGS_PER_HUGEPG 512 #define KVM_UTIL_MIN_PFN 2 +static int vcpu_mmap_sz(void); + /* Aligns x up to the next multiple of size. Size must be a power of 2. */ static void *align(void *x, size_t size) { @@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu) vcpu->dirty_gfns = NULL; } - ret = munmap(vcpu->state, sizeof(*vcpu->state)); + ret = munmap(vcpu->state, vcpu_mmap_sz()); TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i " "errno: %i", ret, errno); close(vcpu->fd); @@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid) TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size " "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->state)); - vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state), + vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, " "vcpu id: %u errno: %i", vcpuid, errno); -- cgit v1.2.3 From 30b5c851af7991ad08abe90c1e7c31615fa98a1a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 1 Mar 2021 12:53:09 +0000 Subject: KVM: x86/xen: Add support for vCPU runstate information This is how Xen guests do steal time accounting. The hypervisor records the amount of time spent in each of running/runnable/blocked/offline states. In the Xen accounting, a vCPU is still in state RUNSTATE_running while in Xen for a hypercall or I/O trap, etc. Only if Xen explicitly schedules does the state become RUNSTATE_blocked. In KVM this means that even when the vCPU exits the kvm_run loop, the state remains RUNSTATE_running. The VMM can explicitly set the vCPU to RUNSTATE_blocked by using the KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT attribute, and can also use KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST to retrospectively add a given amount of time to the blocked state and subtract it from the running state. The state_entry_time corresponds to get_kvmclock_ns() at the time the vCPU entered the current state, and the total times of all four states should always add up to state_entry_time. Co-developed-by: Joao Martins Signed-off-by: Joao Martins Signed-off-by: David Woodhouse Message-Id: <20210301125309.874953-2-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 41 +++ arch/x86/include/asm/kvm_host.h | 6 + arch/x86/kvm/x86.c | 13 +- arch/x86/kvm/xen.c | 286 +++++++++++++++++++++ arch/x86/kvm/xen.h | 40 ++- include/uapi/linux/kvm.h | 13 + .../testing/selftests/kvm/x86_64/xen_shinfo_test.c | 159 +++++++++++- 7 files changed, 553 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 359435d4e417..1a2b5210cdbf 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4878,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above. union { __u64 gpa; __u64 pad[4]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; } u; }; @@ -4890,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO Sets the guest physical address of an additional pvclock structure for a given vCPU. This is typically used for guest vsyscall support. +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR + Sets the guest physical address of the vcpu_runstate_info for a given + vCPU. This is how a Xen guest tracks CPU state such as steal time. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT + Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of + the given vCPU from the .u.runstate.state member of the structure. + KVM automatically accounts running and runnable time but blocked + and offline states are only entered explicitly. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA + Sets all fields of the vCPU runstate data from the .u.runstate member + of the structure, including the current runstate. The state_entry_time + must equal the sum of the other four times. + +KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST + This *adds* the contents of the .u.runstate members of the structure + to the corresponding members of the given vCPU's runstate data, thus + permitting atomic adjustments to the runstate times. The adjustment + to the state_entry_time must equal the sum of the adjustments to the + other four times. The state field must be set to -1, or to a valid + runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked + or RUNSTATE_offline) to set the current accounted state as of the + adjusted state_entry_time. + 4.130 KVM_XEN_VCPU_GET_ATTR --------------------------- @@ -4902,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO Allows Xen vCPU attributes to be read. For the structure and types, see KVM_XEN_VCPU_SET_ATTR above. +The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used +with the KVM_XEN_VCPU_GET_ATTR ioctl. + 5. The kvm_run structure ======================== @@ -6666,6 +6702,7 @@ PVHVM guests. Valid flags are:: #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 2) The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG ioctl is available, for the guest to set its hypercall page. @@ -6680,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors for event channel upcalls when the evtchn_upcall_pending field of a vcpu's vcpu_info is set. + +The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related +features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are +supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 85ccbd4b7c52..877a4025d8da 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -535,10 +535,16 @@ struct kvm_vcpu_hv { /* Xen HVM per vcpu emulation context */ struct kvm_vcpu_xen { u64 hypercall_rip; + u32 current_runstate; bool vcpu_info_set; bool vcpu_time_info_set; + bool runstate_set; struct gfn_to_hva_cache vcpu_info_cache; struct gfn_to_hva_cache vcpu_time_info_cache; + struct gfn_to_hva_cache runstate_cache; + u64 last_steal; + u64 runstate_entry_time; + u64 runstate_times[4]; }; struct kvm_vcpu_arch { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4a5ce57b0bb2..868213ca4f98 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2956,6 +2956,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu) struct kvm_host_map map; struct kvm_steal_time *st; + if (kvm_xen_msr_enabled(vcpu->kvm)) { + kvm_xen_runstate_set_running(vcpu); + return; + } + if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; @@ -3760,6 +3765,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; + if (sched_info_on()) + r |= KVM_XEN_HVM_CONFIG_RUNSTATE; break; #endif case KVM_CAP_SYNC_REGS: @@ -4039,7 +4046,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) if (vcpu->preempted && !vcpu->arch.guest_state_protected) vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu); - kvm_steal_time_set_preempted(vcpu); + if (kvm_xen_msr_enabled(vcpu->kvm)) + kvm_xen_runstate_set_preempted(vcpu); + else + kvm_steal_time_set_preempted(vcpu); + static_call(kvm_x86_vcpu_put)(vcpu); vcpu->arch.last_host_tsc = rdtsc(); /* diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index 77b20ff09078..ae17250e1efe 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -11,9 +11,11 @@ #include "hyperv.h" #include +#include #include #include +#include #include "trace.h" @@ -61,6 +63,132 @@ out: return ret; } +static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + u64 now = get_kvmclock_ns(v->kvm); + u64 delta_ns = now - vx->runstate_entry_time; + u64 run_delay = current->sched_info.run_delay; + + if (unlikely(!vx->runstate_entry_time)) + vx->current_runstate = RUNSTATE_offline; + + /* + * Time waiting for the scheduler isn't "stolen" if the + * vCPU wasn't running anyway. + */ + if (vx->current_runstate == RUNSTATE_running) { + u64 steal_ns = run_delay - vx->last_steal; + + delta_ns -= steal_ns; + + vx->runstate_times[RUNSTATE_runnable] += steal_ns; + } + vx->last_steal = run_delay; + + vx->runstate_times[vx->current_runstate] += delta_ns; + vx->current_runstate = state; + vx->runstate_entry_time = now; +} + +void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + uint64_t state_entry_time; + unsigned int offset; + + kvm_xen_update_runstate(v, state); + + if (!vx->runstate_set) + return; + + BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); + + offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time); +#ifdef CONFIG_X86_64 + /* + * The only difference is alignment of uint64_t in 32-bit. + * So the first field 'state' is accessed directly using + * offsetof() (where its offset happens to be zero), while the + * remaining fields which are all uint64_t, start at 'offset' + * which we tweak here by adding 4. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != + offsetof(struct compat_vcpu_runstate_info, time) + 4); + + if (v->kvm->arch.xen.long_mode) + offset = offsetof(struct vcpu_runstate_info, state_entry_time); +#endif + /* + * First write the updated state_entry_time at the appropriate + * location determined by 'offset'. + */ + state_entry_time = vx->runstate_entry_time; + state_entry_time |= XEN_RUNSTATE_UPDATE; + + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) != + sizeof(state_entry_time)); + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) != + sizeof(state_entry_time)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &state_entry_time, offset, + sizeof(state_entry_time))) + return; + smp_wmb(); + + /* + * Next, write the new runstate. This is in the *same* place + * for 32-bit and 64-bit guests, asserted here for paranoia. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != + offsetof(struct compat_vcpu_runstate_info, state)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) != + sizeof(vx->current_runstate)); + BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) != + sizeof(vx->current_runstate)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &vx->current_runstate, + offsetof(struct vcpu_runstate_info, state), + sizeof(vx->current_runstate))) + return; + + /* + * Write the actual runstate times immediately after the + * runstate_entry_time. + */ + BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != + offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); + BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + sizeof(((struct compat_vcpu_runstate_info *)0)->time)); + BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) != + sizeof(vx->runstate_times)); + + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &vx->runstate_times[0], + offset + sizeof(u64), + sizeof(vx->runstate_times))) + return; + + smp_wmb(); + + /* + * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's + * runstate_entry_time field. + */ + + state_entry_time &= ~XEN_RUNSTATE_UPDATE; + if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache, + &state_entry_time, offset, + sizeof(state_entry_time))) + return; +} + int __kvm_xen_has_interrupt(struct kvm_vcpu *v) { u8 rc = 0; @@ -223,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) } break; + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.gpa == GPA_INVALID) { + vcpu->arch.xen.runstate_set = false; + r = 0; + break; + } + + r = kvm_gfn_to_hva_cache_init(vcpu->kvm, + &vcpu->arch.xen.runstate_cache, + data->u.gpa, + sizeof(struct vcpu_runstate_info)); + if (!r) { + vcpu->arch.xen.runstate_set = true; + } + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline) { + r = -EINVAL; + break; + } + + kvm_xen_update_runstate(vcpu, data->u.runstate.state); + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline) { + r = -EINVAL; + break; + } + if (data->u.runstate.state_entry_time != + (data->u.runstate.time_running + + data->u.runstate.time_runnable + + data->u.runstate.time_blocked + + data->u.runstate.time_offline)) { + r = -EINVAL; + break; + } + if (get_kvmclock_ns(vcpu->kvm) < + data->u.runstate.state_entry_time) { + r = -EINVAL; + break; + } + + vcpu->arch.xen.current_runstate = data->u.runstate.state; + vcpu->arch.xen.runstate_entry_time = + data->u.runstate.state_entry_time; + vcpu->arch.xen.runstate_times[RUNSTATE_running] = + data->u.runstate.time_running; + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] = + data->u.runstate.time_runnable; + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] = + data->u.runstate.time_blocked; + vcpu->arch.xen.runstate_times[RUNSTATE_offline] = + data->u.runstate.time_offline; + vcpu->arch.xen.last_steal = current->sched_info.run_delay; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (data->u.runstate.state > RUNSTATE_offline && + data->u.runstate.state != (u64)-1) { + r = -EINVAL; + break; + } + /* The adjustment must add up */ + if (data->u.runstate.state_entry_time != + (data->u.runstate.time_running + + data->u.runstate.time_runnable + + data->u.runstate.time_blocked + + data->u.runstate.time_offline)) { + r = -EINVAL; + break; + } + + if (get_kvmclock_ns(vcpu->kvm) < + (vcpu->arch.xen.runstate_entry_time + + data->u.runstate.state_entry_time)) { + r = -EINVAL; + break; + } + + vcpu->arch.xen.runstate_entry_time += + data->u.runstate.state_entry_time; + vcpu->arch.xen.runstate_times[RUNSTATE_running] += + data->u.runstate.time_running; + vcpu->arch.xen.runstate_times[RUNSTATE_runnable] += + data->u.runstate.time_runnable; + vcpu->arch.xen.runstate_times[RUNSTATE_blocked] += + data->u.runstate.time_blocked; + vcpu->arch.xen.runstate_times[RUNSTATE_offline] += + data->u.runstate.time_offline; + + if (data->u.runstate.state <= RUNSTATE_offline) + kvm_xen_update_runstate(vcpu, data->u.runstate.state); + r = 0; + break; + default: break; } @@ -255,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) r = 0; break; + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + if (vcpu->arch.xen.runstate_set) { + data->u.gpa = vcpu->arch.xen.runstate_cache.gpa; + r = 0; + } + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate.state = vcpu->arch.xen.current_runstate; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate.state = vcpu->arch.xen.current_runstate; + data->u.runstate.state_entry_time = + vcpu->arch.xen.runstate_entry_time; + data->u.runstate.time_running = + vcpu->arch.xen.runstate_times[RUNSTATE_running]; + data->u.runstate.time_runnable = + vcpu->arch.xen.runstate_times[RUNSTATE_runnable]; + data->u.runstate.time_blocked = + vcpu->arch.xen.runstate_times[RUNSTATE_blocked]; + data->u.runstate.time_offline = + vcpu->arch.xen.runstate_times[RUNSTATE_offline]; + r = 0; + break; + + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST: + r = -EINVAL; + break; + default: break; } diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 87eaf2be9549..463a7844a8ca 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -23,6 +23,12 @@ int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data); int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc); void kvm_xen_destroy_vm(struct kvm *kvm); +static inline bool kvm_xen_msr_enabled(struct kvm *kvm) +{ + return static_branch_unlikely(&kvm_xen_enabled.key) && + kvm->arch.xen_hvm_config.msr; +} + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return static_branch_unlikely(&kvm_xen_enabled.key) && @@ -48,6 +54,11 @@ static inline void kvm_xen_destroy_vm(struct kvm *kvm) { } +static inline bool kvm_xen_msr_enabled(struct kvm *kvm) +{ + return false; +} + static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm) { return false; @@ -61,10 +72,31 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu) int kvm_xen_hypercall(struct kvm_vcpu *vcpu); -/* 32-bit compatibility definitions, also used natively in 32-bit build */ #include #include +#include +void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state); + +static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu) +{ + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running); +} + +static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) +{ + /* + * If the vCPU wasn't preempted but took a normal exit for + * some reason (hypercalls, I/O, etc.), that is accounted as + * still RUNSTATE_running, as the VMM is still operating on + * behalf of the vCPU. Only if the VMM does actually block + * does it need to enter RUNSTATE_blocked. + */ + if (vcpu->preempted) + kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); +} + +/* 32-bit compatibility definitions, also used natively in 32-bit build */ struct compat_arch_vcpu_info { unsigned int cr2; unsigned int pad[5]; @@ -97,4 +129,10 @@ struct compat_shared_info { struct compat_arch_shared_info arch; }; +struct compat_vcpu_runstate_info { + int state; + uint64_t state_entry_time; + uint64_t time[4]; +} __attribute__((packed)); + #endif /* __ARCH_X86_KVM_XEN_H__ */ diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 8b281f722e5b..f6afee209620 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1154,6 +1154,7 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) +#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) struct kvm_xen_hvm_config { __u32 flags; @@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr { union { __u64 gpa; __u64 pad[8]; + struct { + __u64 state; + __u64 state_entry_time; + __u64 time_running; + __u64 time_runnable; + __u64 time_blocked; + __u64 time_offline; + } runstate; } u; }; /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4 +#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5 /* Secure Encrypted Virtualization command */ enum sev_cmd_id { diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 9246ea310587..804ff5ff022d 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -13,19 +13,27 @@ #include #include +#include +#include #define VCPU_ID 5 +#define SHINFO_REGION_GVA 0xc0000000ULL #define SHINFO_REGION_GPA 0xc0000000ULL #define SHINFO_REGION_SLOT 10 #define PAGE_SIZE 4096 #define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE) +#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + 0x20) + +#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20) static struct kvm_vm *vm; #define XEN_HYPERCALL_MSR 0x40000000 +#define MIN_STEAL_TIME 50000 + struct pvclock_vcpu_time_info { u32 version; u32 pad0; @@ -43,11 +51,67 @@ struct pvclock_wall_clock { u32 nsec; } __attribute__((__packed__)); +struct vcpu_runstate_info { + uint32_t state; + uint64_t state_entry_time; + uint64_t time[4]; +}; + +#define RUNSTATE_running 0 +#define RUNSTATE_runnable 1 +#define RUNSTATE_blocked 2 +#define RUNSTATE_offline 3 + static void guest_code(void) { + struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR; + + /* Test having the host set runstates manually */ + GUEST_SYNC(RUNSTATE_runnable); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_blocked); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0); + GUEST_ASSERT(rs->state == 0); + + GUEST_SYNC(RUNSTATE_offline); + GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0); + GUEST_ASSERT(rs->state == 0); + + /* Test runstate time adjust */ + GUEST_SYNC(4); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b); + + /* Test runstate time set */ + GUEST_SYNC(5); + GUEST_ASSERT(rs->state_entry_time >= 0x8000); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0); + GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b); + GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a); + + /* sched_yield() should result in some 'runnable' time */ + GUEST_SYNC(6); + GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME); + GUEST_DONE(); } +static long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + fscanf(fp, "%ld %ld ", &val[0], &val[1]); + fclose(fp); + + return val[1]; +} + static int cmp_timespec(struct timespec *a, struct timespec *b) { if (a->tv_sec > b->tv_sec) @@ -66,12 +130,14 @@ int main(int argc, char *argv[]) { struct timespec min_ts, max_ts, vm_ts; - if (!(kvm_check_cap(KVM_CAP_XEN_HVM) & - KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { + int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM); + if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) { print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available"); exit(KSFT_SKIP); } + bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE); + clock_gettime(CLOCK_REALTIME, &min_ts); vm = vm_create_default(VCPU_ID, 0, (void *) guest_code); @@ -80,6 +146,7 @@ int main(int argc, char *argv[]) /* Map a region for the shared_info page */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0); + virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0); struct kvm_xen_hvm_config hvmc = { .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, @@ -111,6 +178,17 @@ int main(int argc, char *argv[]) }; vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock); + if (do_runstate_tests) { + struct kvm_xen_vcpu_attr st = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + .u.gpa = RUNSTATE_ADDR, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st); + } + + struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);; + rs->state = 0x5a; + for (;;) { volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); struct ucall uc; @@ -126,8 +204,56 @@ int main(int argc, char *argv[]) case UCALL_ABORT: TEST_FAIL("%s", (const char *)uc.args[0]); /* NOT REACHED */ - case UCALL_SYNC: + case UCALL_SYNC: { + struct kvm_xen_vcpu_attr rst; + long rundelay; + + /* If no runstate support, bail out early */ + if (!do_runstate_tests) + goto done; + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + + switch (uc.args[1]) { + case RUNSTATE_running...RUNSTATE_offline: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT; + rst.u.runstate.state = uc.args[1]; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 4: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = (uint64_t)-1; + rst.u.runstate.time_blocked = + 0x5a - rs->time[RUNSTATE_blocked]; + rst.u.runstate.time_offline = + 0x6b6b - rs->time[RUNSTATE_offline]; + rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked - + rst.u.runstate.time_offline; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + + case 5: + rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA; + memset(&rst.u, 0, sizeof(rst.u)); + rst.u.runstate.state = RUNSTATE_running; + rst.u.runstate.state_entry_time = 0x6b6b + 0x5a; + rst.u.runstate.time_blocked = 0x6b6b; + rst.u.runstate.time_offline = 0x5a; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst); + break; + case 6: + /* Yield until scheduler delay exceeds target */ + rundelay = get_run_delay() + MIN_STEAL_TIME; + do { + sched_yield(); + } while (get_run_delay() < rundelay); + break; + } break; + } case UCALL_DONE: goto done; default: @@ -162,6 +288,33 @@ int main(int argc, char *argv[]) TEST_ASSERT(ti2->version && !(ti2->version & 1), "Bad time_info version %x", ti->version); + if (do_runstate_tests) { + /* + * Fetch runstate and check sanity. Strictly speaking in the + * general case we might not expect the numbers to be identical + * but in this case we know we aren't running the vCPU any more. + */ + struct kvm_xen_vcpu_attr rst = { + .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA, + }; + vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst); + + TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch"); + TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time, + "State entry time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running, + "Running time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable, + "Runnable time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked, + "Blocked time mismatch"); + TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline, + "Offline time mismatch"); + + TEST_ASSERT(rs->state_entry_time == rs->time[0] + + rs->time[1] + rs->time[2] + rs->time[3], + "runstate times don't add up"); + } kvm_vm_free(vm); return 0; } -- cgit v1.2.3 From 3ae0415d0bb401abad1db7468105e3d3756e153f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:16:45 -0300 Subject: tools headers UAPI: Update tools's copy of drm.h headers Picking the changes from: 0e0dc448005583a6 ("drm/doc: demote old doc-comments in drm.h") Silencing these perf build warnings: Warning: Kernel ABI header at 'tools/include/uapi/drm/drm.h' differs from latest version at 'include/uapi/drm/drm.h' diff -u tools/include/uapi/drm/drm.h include/uapi/drm/drm.h No changes in tooling as these are just C comment documentation changes. Cc: Simon Ser Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/drm.h | 97 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 48 deletions(-) (limited to 'tools') diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 808b48a93330..0827037c5484 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -1,11 +1,10 @@ -/** - * \file drm.h +/* * Header for the Direct Rendering Manager * - * \author Rickard E. (Rik) Faith + * Author: Rickard E. (Rik) Faith * - * \par Acknowledgments: - * Dec 1999, Richard Henderson , move to generic \c cmpxchg. + * Acknowledgments: + * Dec 1999, Richard Henderson , move to generic cmpxchg. */ /* @@ -85,7 +84,7 @@ typedef unsigned int drm_context_t; typedef unsigned int drm_drawable_t; typedef unsigned int drm_magic_t; -/** +/* * Cliprect. * * \warning: If you change this structure, make sure you change @@ -101,7 +100,7 @@ struct drm_clip_rect { unsigned short y2; }; -/** +/* * Drawable information. */ struct drm_drawable_info { @@ -109,7 +108,7 @@ struct drm_drawable_info { struct drm_clip_rect *rects; }; -/** +/* * Texture region, */ struct drm_tex_region { @@ -120,7 +119,7 @@ struct drm_tex_region { unsigned int age; }; -/** +/* * Hardware lock. * * The lock structure is a simple cache-line aligned integer. To avoid @@ -132,7 +131,7 @@ struct drm_hw_lock { char padding[60]; /**< Pad to cache line */ }; -/** +/* * DRM_IOCTL_VERSION ioctl argument type. * * \sa drmGetVersion(). @@ -149,7 +148,7 @@ struct drm_version { char __user *desc; /**< User-space buffer to hold desc */ }; -/** +/* * DRM_IOCTL_GET_UNIQUE ioctl argument type. * * \sa drmGetBusid() and drmSetBusId(). @@ -168,7 +167,7 @@ struct drm_block { int unused; }; -/** +/* * DRM_IOCTL_CONTROL ioctl argument type. * * \sa drmCtlInstHandler() and drmCtlUninstHandler(). @@ -183,7 +182,7 @@ struct drm_control { int irq; }; -/** +/* * Type of memory to map. */ enum drm_map_type { @@ -195,7 +194,7 @@ enum drm_map_type { _DRM_CONSISTENT = 5 /**< Consistent memory for PCI DMA */ }; -/** +/* * Memory mapping flags. */ enum drm_map_flags { @@ -214,7 +213,7 @@ struct drm_ctx_priv_map { void *handle; /**< Handle of map */ }; -/** +/* * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls * argument type. * @@ -231,7 +230,7 @@ struct drm_map { /* Private data */ }; -/** +/* * DRM_IOCTL_GET_CLIENT ioctl argument type. */ struct drm_client { @@ -263,7 +262,7 @@ enum drm_stat_type { /* Add to the *END* of the list */ }; -/** +/* * DRM_IOCTL_GET_STATS ioctl argument type. */ struct drm_stats { @@ -274,7 +273,7 @@ struct drm_stats { } data[15]; }; -/** +/* * Hardware locking flags. */ enum drm_lock_flags { @@ -289,7 +288,7 @@ enum drm_lock_flags { _DRM_HALT_CUR_QUEUES = 0x20 /**< Halt all current queues */ }; -/** +/* * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type. * * \sa drmGetLock() and drmUnlock(). @@ -299,7 +298,7 @@ struct drm_lock { enum drm_lock_flags flags; }; -/** +/* * DMA flags * * \warning @@ -328,7 +327,7 @@ enum drm_dma_flags { _DRM_DMA_LARGER_OK = 0x40 /**< Larger-than-requested buffers OK */ }; -/** +/* * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type. * * \sa drmAddBufs(). @@ -351,7 +350,7 @@ struct drm_buf_desc { */ }; -/** +/* * DRM_IOCTL_INFO_BUFS ioctl argument type. */ struct drm_buf_info { @@ -359,7 +358,7 @@ struct drm_buf_info { struct drm_buf_desc __user *list; }; -/** +/* * DRM_IOCTL_FREE_BUFS ioctl argument type. */ struct drm_buf_free { @@ -367,7 +366,7 @@ struct drm_buf_free { int __user *list; }; -/** +/* * Buffer information * * \sa drm_buf_map. @@ -379,7 +378,7 @@ struct drm_buf_pub { void __user *address; /**< Address of buffer */ }; -/** +/* * DRM_IOCTL_MAP_BUFS ioctl argument type. */ struct drm_buf_map { @@ -392,7 +391,7 @@ struct drm_buf_map { struct drm_buf_pub __user *list; /**< Buffer information */ }; -/** +/* * DRM_IOCTL_DMA ioctl argument type. * * Indices here refer to the offset into the buffer list in drm_buf_get. @@ -417,7 +416,7 @@ enum drm_ctx_flags { _DRM_CONTEXT_2DONLY = 0x02 }; -/** +/* * DRM_IOCTL_ADD_CTX ioctl argument type. * * \sa drmCreateContext() and drmDestroyContext(). @@ -427,7 +426,7 @@ struct drm_ctx { enum drm_ctx_flags flags; }; -/** +/* * DRM_IOCTL_RES_CTX ioctl argument type. */ struct drm_ctx_res { @@ -435,14 +434,14 @@ struct drm_ctx_res { struct drm_ctx __user *contexts; }; -/** +/* * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type. */ struct drm_draw { drm_drawable_t handle; }; -/** +/* * DRM_IOCTL_UPDATE_DRAW ioctl argument type. */ typedef enum { @@ -456,14 +455,14 @@ struct drm_update_draw { unsigned long long data; }; -/** +/* * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type. */ struct drm_auth { drm_magic_t magic; }; -/** +/* * DRM_IOCTL_IRQ_BUSID ioctl argument type. * * \sa drmGetInterruptFromBusID(). @@ -505,7 +504,7 @@ struct drm_wait_vblank_reply { long tval_usec; }; -/** +/* * DRM_IOCTL_WAIT_VBLANK ioctl argument type. * * \sa drmWaitVBlank(). @@ -518,7 +517,7 @@ union drm_wait_vblank { #define _DRM_PRE_MODESET 1 #define _DRM_POST_MODESET 2 -/** +/* * DRM_IOCTL_MODESET_CTL ioctl argument type * * \sa drmModesetCtl(). @@ -528,7 +527,7 @@ struct drm_modeset_ctl { __u32 cmd; }; -/** +/* * DRM_IOCTL_AGP_ENABLE ioctl argument type. * * \sa drmAgpEnable(). @@ -537,7 +536,7 @@ struct drm_agp_mode { unsigned long mode; /**< AGP mode */ }; -/** +/* * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type. * * \sa drmAgpAlloc() and drmAgpFree(). @@ -549,7 +548,7 @@ struct drm_agp_buffer { unsigned long physical; /**< Physical used by i810 */ }; -/** +/* * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type. * * \sa drmAgpBind() and drmAgpUnbind(). @@ -559,7 +558,7 @@ struct drm_agp_binding { unsigned long offset; /**< In bytes -- will round to page boundary */ }; -/** +/* * DRM_IOCTL_AGP_INFO ioctl argument type. * * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(), @@ -580,7 +579,7 @@ struct drm_agp_info { unsigned short id_device; }; -/** +/* * DRM_IOCTL_SG_ALLOC ioctl argument type. */ struct drm_scatter_gather { @@ -588,7 +587,7 @@ struct drm_scatter_gather { unsigned long handle; /**< Used for mapping / unmapping */ }; -/** +/* * DRM_IOCTL_SET_VERSION ioctl argument type. */ struct drm_set_version { @@ -598,14 +597,14 @@ struct drm_set_version { int drm_dd_minor; }; -/** DRM_IOCTL_GEM_CLOSE ioctl argument type */ +/* DRM_IOCTL_GEM_CLOSE ioctl argument type */ struct drm_gem_close { /** Handle of the object to be closed. */ __u32 handle; __u32 pad; }; -/** DRM_IOCTL_GEM_FLINK ioctl argument type */ +/* DRM_IOCTL_GEM_FLINK ioctl argument type */ struct drm_gem_flink { /** Handle for the object being named */ __u32 handle; @@ -614,7 +613,7 @@ struct drm_gem_flink { __u32 name; }; -/** DRM_IOCTL_GEM_OPEN ioctl argument type */ +/* DRM_IOCTL_GEM_OPEN ioctl argument type */ struct drm_gem_open { /** Name of object being opened */ __u32 name; @@ -652,7 +651,7 @@ struct drm_gem_open { #define DRM_CAP_SYNCOBJ 0x13 #define DRM_CAP_SYNCOBJ_TIMELINE 0x14 -/** DRM_IOCTL_GET_CAP ioctl argument type */ +/* DRM_IOCTL_GET_CAP ioctl argument type */ struct drm_get_cap { __u64 capability; __u64 value; @@ -678,7 +677,9 @@ struct drm_get_cap { /** * DRM_CLIENT_CAP_ATOMIC * - * If set to 1, the DRM core will expose atomic properties to userspace + * If set to 1, the DRM core will expose atomic properties to userspace. This + * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and + * &DRM_CLIENT_CAP_ASPECT_RATIO. */ #define DRM_CLIENT_CAP_ATOMIC 3 @@ -698,7 +699,7 @@ struct drm_get_cap { */ #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS 5 -/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ +/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */ struct drm_set_client_cap { __u64 capability; __u64 value; @@ -950,7 +951,7 @@ extern "C" { #define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2) -/** +/* * Device specific ioctls should only be in their respective headers * The device specific ioctl range is from 0x40 to 0x9f. * Generic IOCTLS restart at 0xA0. @@ -961,7 +962,7 @@ extern "C" { #define DRM_COMMAND_BASE 0x40 #define DRM_COMMAND_END 0xA0 -/** +/* * Header for events written back to userspace on the drm fd. The * type defines the type of event, the length specifies the total * length of the event (including the header), and user_data is -- cgit v1.2.3 From c2446944b3f588d6a0186f2022a2999c90e0cb63 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:21:00 -0300 Subject: tools headers UAPI: Sync drm/i915_drm.h with the kernel sources To pick the changes in: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") 348fb0cb0a79bce0 ("drm/i915/pmu: Deprecate I915_PMU_LAST and optimize state tracking") That don't result in any change in tooling: $ tools/perf/trace/beauty/drm_ioctl.sh > before $ cp include/uapi/drm/i915_drm.h tools/include/uapi/drm/i915_drm.h $ tools/perf/trace/beauty/drm_ioctl.sh > after $ diff -u before after $ Only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/drm/i915_drm.h' differs from latest version at 'include/uapi/drm/i915_drm.h' diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h Cc: Chris Wilson Cc: Tvrtko Ursulin Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/drm/i915_drm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index fa1f3d62f9a6..1987e2ea79a3 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample { #define I915_PMU_REQUESTED_FREQUENCY __I915_PMU_OTHER(1) #define I915_PMU_INTERRUPTS __I915_PMU_OTHER(2) #define I915_PMU_RC6_RESIDENCY __I915_PMU_OTHER(3) +#define I915_PMU_SOFTWARE_GT_AWAKE_TIME __I915_PMU_OTHER(4) -#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY +#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY /* Each region is a minimum of 16k, and there are at most 255 of them. */ -- cgit v1.2.3 From 1e61463cfcd0b3e7a19ba36b8a98c64ebaac5c6e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:44:37 -0300 Subject: tools headers UAPI: Sync openat2.h with the kernel sources To pick the changes in: 99668f618062816c ("fs: expose LOOKUP_CACHED through openat2() RESOLVE_CACHED") That don't result in any change in tooling, only silences this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/openat2.h' differs from latest version at 'include/uapi/linux/openat2.h' diff -u tools/include/uapi/linux/openat2.h include/uapi/linux/openat2.h Cc: Al Viro Cc: Jens Axboe Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/openat2.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/openat2.h b/tools/include/uapi/linux/openat2.h index 58b1eb711360..a5feb7604948 100644 --- a/tools/include/uapi/linux/openat2.h +++ b/tools/include/uapi/linux/openat2.h @@ -35,5 +35,9 @@ struct open_how { #define RESOLVE_IN_ROOT 0x10 /* Make all jumps to "/" and ".." be scoped inside the dirfd (similar to chroot(2)). */ +#define RESOLVE_CACHED 0x20 /* Only complete if resolution can be + completed through cached lookup. May + return -EAGAIN if that's not + possible. */ #endif /* _UAPI_LINUX_OPENAT2_H */ -- cgit v1.2.3 From add76c0113ba6343a221f1ba1fa5edc8963db07c Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:48:05 -0300 Subject: perf arch powerpc: Sync powerpc syscall.tbl with the kernel sources To get the changes in: fbcee2ebe8edbb6a ("powerpc/32: Always save non volatile GPRs at syscall entry") That shouldn't cause any change in tooling, just silences the following tools/perf/ build warning: Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' Cc: Christophe Leroy Cc: Michael Ellerman Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index f744eb5cba88..96b2157f0371 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -9,9 +9,7 @@ # 0 nospu restart_syscall sys_restart_syscall 1 nospu exit sys_exit -2 32 fork ppc_fork sys_fork -2 64 fork sys_fork -2 spu fork sys_ni_syscall +2 nospu fork sys_fork 3 common read sys_read 4 common write sys_write 5 common open sys_open compat_sys_open @@ -160,9 +158,7 @@ 119 32 sigreturn sys_sigreturn compat_sys_sigreturn 119 64 sigreturn sys_ni_syscall 119 spu sigreturn sys_ni_syscall -120 32 clone ppc_clone sys_clone -120 64 clone sys_clone -120 spu clone sys_ni_syscall +120 nospu clone sys_clone 121 common setdomainname sys_setdomainname 122 common uname sys_newuname 123 common modify_ldt sys_ni_syscall @@ -244,9 +240,7 @@ 186 spu sendfile sys_sendfile64 187 common getpmsg sys_ni_syscall 188 common putpmsg sys_ni_syscall -189 32 vfork ppc_vfork sys_vfork -189 64 vfork sys_vfork -189 spu vfork sys_ni_syscall +189 nospu vfork sys_vfork 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 common readahead sys_readahead compat_sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 @@ -322,9 +316,7 @@ 248 32 clock_nanosleep sys_clock_nanosleep_time32 248 64 clock_nanosleep sys_clock_nanosleep 248 spu clock_nanosleep sys_clock_nanosleep -249 32 swapcontext ppc_swapcontext compat_sys_swapcontext -249 64 swapcontext sys_swapcontext -249 spu swapcontext sys_ni_syscall +249 nospu swapcontext sys_swapcontext compat_sys_swapcontext 250 common tgkill sys_tgkill 251 32 utimes sys_utimes_time32 251 64 utimes sys_utimes @@ -522,9 +514,7 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -435 32 clone3 ppc_clone3 sys_clone3 -435 64 clone3 sys_clone3 -435 spu clone3 sys_ni_syscall +435 nospu clone3 sys_clone3 436 common close_range sys_close_range 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd -- cgit v1.2.3 From 303550a44741de7e853d1c0f1d252a8719a88cb1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:51:17 -0300 Subject: tools headers UAPI s390: Sync ptrace.h kernel headers To pick up the changes from: 56e62a7370283601 ("s390: convert to generic entry") That only adds two new defines, so shouldn't cause problems when building the BPF selftests. Silencing this perf build warning: Warning: Kernel ABI header at 'tools/arch/s390/include/uapi/asm/ptrace.h' differs from latest version at 'arch/s390/include/uapi/asm/ptrace.h' diff -u tools/arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/ptrace.h Cc: Hendrik Brueckner Cc: Sven Schnelle Cc: Vasily Gorbik Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/ptrace.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/arch/s390/include/uapi/asm/ptrace.h b/tools/arch/s390/include/uapi/asm/ptrace.h index 543dd70e12c8..ad64d673b5e6 100644 --- a/tools/arch/s390/include/uapi/asm/ptrace.h +++ b/tools/arch/s390/include/uapi/asm/ptrace.h @@ -179,8 +179,9 @@ #define ACR_SIZE 4 -#define PTRACE_OLDSETOPTIONS 21 - +#define PTRACE_OLDSETOPTIONS 21 +#define PTRACE_SYSEMU 31 +#define PTRACE_SYSEMU_SINGLESTEP 32 #ifndef __ASSEMBLY__ #include #include -- cgit v1.2.3 From 21b7e35bdf0a0e44525ec4e8a7862eb4a8df8ebe Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 23 Feb 2021 09:56:50 -0300 Subject: tools headers UAPI: Sync kvm.h headers with the kernel sources To pick the changes in: d9a47edabc4f9481 ("KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR") 8d4e7e80838f45d3 ("KVM: x86: declare Xen HVM shared info capability and add test case") 40da8ccd724f7ca2 ("KVM: x86/xen: Add event channel interrupt vector upcall") These new IOCTLs are now supported on 'perf trace': $ tools/perf/trace/beauty/kvm_ioctl.sh > before $ cp include/uapi/linux/kvm.h tools/include/uapi/linux/kvm.h $ tools/perf/trace/beauty/kvm_ioctl.sh > after $ diff -u before after --- before 2021-02-23 09:55:46.229058308 -0300 +++ after 2021-02-23 09:55:57.509308058 -0300 @@ -91,6 +91,10 @@ [0xc1] = "GET_SUPPORTED_HV_CPUID", [0xc6] = "X86_SET_MSR_FILTER", [0xc7] = "RESET_DIRTY_RINGS", + [0xc8] = "XEN_HVM_GET_ATTR", + [0xc9] = "XEN_HVM_SET_ATTR", + [0xca] = "XEN_VCPU_GET_ATTR", + [0xcb] = "XEN_VCPU_SET_ATTR", [0xe0] = "CREATE_DEVICE", [0xe1] = "SET_DEVICE_ATTR", [0xe2] = "GET_DEVICE_ATTR", $ Addressing this perf build warning: Warning: Kernel ABI header at 'tools/include/uapi/linux/kvm.h' differs from latest version at 'include/uapi/linux/kvm.h' diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h Cc: David Woodhouse Cc: Paul Mackerras Cc: Ravi Bangoria Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kvm.h | 73 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index abb89bbe5635..8b281f722e5b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -216,6 +216,20 @@ struct kvm_hyperv_exit { } u; }; +struct kvm_xen_exit { +#define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -252,6 +266,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_X86_WRMSR 30 #define KVM_EXIT_DIRTY_RING_FULL 31 #define KVM_EXIT_AP_RESET_HOLD 32 +#define KVM_EXIT_X86_BUS_LOCK 33 +#define KVM_EXIT_XEN 34 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -428,6 +444,8 @@ struct kvm_run { __u32 index; /* kernel -> user */ __u64 data; /* kernel <-> user */ } msr; + /* KVM_EXIT_XEN */ + struct kvm_xen_exit xen; /* Fix the size of the union. */ char padding[256]; }; @@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_X86_BUS_LOCK_EXIT 193 #define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING @@ -1132,6 +1151,10 @@ struct kvm_x86_mce { #endif #ifdef KVM_CAP_XEN_HVM +#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) +#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) +#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -1566,6 +1589,45 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_DIRTY_LOG_RING */ #define KVM_RESET_DIRTY_RINGS _IO(KVMIO, 0xc7) +/* Per-VM Xen attributes */ +#define KVM_XEN_HVM_GET_ATTR _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr) +#define KVM_XEN_HVM_SET_ATTR _IOW(KVMIO, 0xc9, struct kvm_xen_hvm_attr) + +struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0 +#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1 +#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2 + +/* Per-vCPU Xen attributes */ +#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr) +#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr) + +struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[8]; + } u; +}; + +/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */ +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0 +#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1 + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ @@ -1594,6 +1656,8 @@ enum sev_cmd_id { KVM_SEV_DBG_ENCRYPT, /* Guest certificates commands */ KVM_SEV_CERT_EXPORT, + /* Attestation report */ + KVM_SEV_GET_ATTESTATION_REPORT, KVM_SEV_NR_MAX, }; @@ -1646,6 +1710,12 @@ struct kvm_sev_dbg { __u32 len; }; +struct kvm_sev_attestation_report { + __u8 mnonce[16]; + __u64 uaddr; + __u32 len; +}; + #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) #define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) @@ -1767,4 +1837,7 @@ struct kvm_dirty_gfn { __u64 offset; }; +#define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) +#define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + #endif /* __LINUX_KVM_H */ -- cgit v1.2.3 From ded2e511a8af9f14482b11225f73db63231fc7a4 Mon Sep 17 00:00:00 2001 From: Pierre Gondois Date: Wed, 24 Feb 2021 18:24:10 +0000 Subject: perf tools: Cast (struct timeval).tv_sec when printing The musl-libc [1] defines (struct timeval).tv_sec as a 'long long' for arm and other architectures. The default build having a '-Wformat' flag, not casting the field when printing prevents from building perf. This patch casts the (struct timeval).tv_sec fields to the expected format. [1] git://git.musl-libc.org/musl Signed-off-by: Pierre Gondois Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Douglas.raillard@arm.com Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224182410.5366-1-Pierre.Gondois@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/sched-messaging.c | 4 ++-- tools/perf/bench/sched-pipe.c | 4 ++-- tools/perf/bench/syscall.c | 4 ++-- tools/perf/util/header.c | 4 ++-- tools/perf/util/stat-display.c | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c index cecce93ccc63..488f6e6ba1a5 100644 --- a/tools/perf/bench/sched-messaging.c +++ b/tools/perf/bench/sched-messaging.c @@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv) num_groups, num_groups * 2 * num_fds, thread_mode ? "threads" : "processes"); printf(" %14s: %lu.%03lu [sec]\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; case BENCH_FORMAT_SIMPLE: - printf("%lu.%03lu\n", diff.tv_sec, + printf("%lu.%03lu\n", (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; default: diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3c88d1f201f1..a960e7a93aec 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); printf(" %14lf usecs/op\n", @@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / USEC_PER_MSEC)); break; diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c index 5fe621cff8e9..9b751016f4b6 100644 --- a/tools/perf/bench/syscall.c +++ b/tools/perf/bench/syscall.c @@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv) result_usec += diff.tv_usec; printf(" %14s: %lu.%03lu [sec]\n\n", "Total time", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec/1000)); printf(" %14lf usecs/op\n", @@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv) case BENCH_FORMAT_SIMPLE: printf("%lu.%03lu\n", - diff.tv_sec, + (unsigned long) diff.tv_sec, (unsigned long) (diff.tv_usec / 1000)); break; diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 4fe9e2a54346..20effdff76ce 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp) fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid); fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n", - tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec, - clockid_ns.tv_sec, clockid_ns.tv_nsec, + tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec, + (long) clockid_ns.tv_sec, clockid_ns.tv_nsec, clockid_name(clockid)); } diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index cce7a76d6473..7f09cdaf5b60 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config, if (config->interval_clear) puts(CONSOLE_CLEAR); - sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep); + sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) { switch (config->aggr_mode) { -- cgit v1.2.3 From 762323eb39a257c3b9875172d5ee134bd448692c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 24 Feb 2021 16:08:31 +0100 Subject: perf build: Move feature cleanup under tools/build Arnaldo reported issue for following build command: $ rm -rf /tmp/krava; mkdir /tmp/krava; make O=/tmp/krava clean CLEAN config /bin/sh: line 0: cd: /tmp/krava/feature/: No such file or directory ../../scripts/Makefile.include:17: *** output directory "/tmp/krava/feature/" does not exist. Stop. make[1]: *** [Makefile.perf:1010: config-clean] Error 2 make: *** [Makefile:90: clean] Error 2 The problem is that now that we include scripts/Makefile.include in feature's Makefile (which is fine and needed), we need to ensure the OUTPUT directory exists, before executing (out of tree) clean command. Removing the feature's cleanup from perf Makefile and fixing feature's cleanup under build Makefile, so it now checks that there's existing OUTPUT directory before calling the clean. Fixes: 211a741cd3e1 ("tools: Factor Clang, LLC and LLVM utils definitions") Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Sedat Dilek # LLVM/Clang v13-git Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210224150831.409639-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile | 8 +++++++- tools/perf/Makefile.perf | 10 +--------- 2 files changed, 8 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/build/Makefile b/tools/build/Makefile index bae48e6fa995..5ed41b96fcde 100644 --- a/tools/build/Makefile +++ b/tools/build/Makefile @@ -30,12 +30,18 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj all: $(OUTPUT)fixdep +# Make sure there's anything to clean, +# feature contains check for existing OUTPUT +TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./) + clean: $(call QUIET_CLEAN, fixdep) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)rm -f $(OUTPUT)fixdep $(call QUIET_CLEAN, feature-detect) - $(Q)$(MAKE) -C feature/ clean >/dev/null +ifneq ($(wildcard $(TMP_O)),) + $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null +endif $(OUTPUT)fixdep-in.o: FORCE $(Q)$(MAKE) $(build)=fixdep diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 5345ac70cd83..536f6f90af92 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS): ### Cleaning rules -# -# This is here, not in Makefile.config, because Makefile.config does -# not get included for the clean target: -# -config-clean: - $(call QUIET_CLEAN, config) - $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null - python-clean: $(python-clean) @@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected -- cgit v1.2.3 From 2b1919ec8338fad3e950f264c0c81f8b17eb6c7e Mon Sep 17 00:00:00 2001 From: Andreas Wendleder Date: Mon, 1 Mar 2021 19:56:42 +0100 Subject: perf tools: Clean 'generated' directory used for creating the syscall table on x86 Remove generated directory tools/perf/arch/x86/include/generated. Signed-off-by: Andreas Wendleder Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301185642.163396-1-gonsolo@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/Makefile | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 8cc6642fce7a..5a9f9a7bf07d 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1 # Syscall table generation # -out := $(OUTPUT)arch/x86/include/generated/asm -header := $(out)/syscalls_64.c -sys := $(srctree)/tools/perf/arch/x86/entry/syscalls -systbl := $(sys)/syscalltbl.sh +generated := $(OUTPUT)arch/x86/include/generated +out := $(generated)/asm +header := $(out)/syscalls_64.c +sys := $(srctree)/tools/perf/arch/x86/entry/syscalls +systbl := $(sys)/syscalltbl.sh # Create output directory if not already present _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') @@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl) $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@ clean:: - $(call QUIET_CLEAN, x86) $(RM) $(header) + $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated) archheaders: $(header) -- cgit v1.2.3 From ffc52b7ae5e6ff2b57c05fa8954fd4cae4efaab4 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Tue, 2 Mar 2021 02:35:33 +0000 Subject: perf diff: Don't crash on freeing errno-session on the error path __cmd_diff() sets result of perf_session__new() to d->session. In case of failure, it's errno and perf-diff may crash with: failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) From the coredump: 0 0x00005569a62b5955 in auxtrace__free (session=0xffffffffffffffff) at util/auxtrace.c:2681 1 0x00005569a626b37d in perf_session__delete (session=0xffffffffffffffff) at util/session.c:295 2 perf_session__delete (session=0xffffffffffffffff) at util/session.c:291 3 0x00005569a618008a in __cmd_diff () at builtin-diff.c:1239 4 cmd_diff (argc=, argv=) at builtin-diff.c:2011 [..] Funny enough, it won't always crash. For me it crashes only if failed file is second in cmd-line: the reason is that cmd_diff() check files for branch-stacks [in check_file_brstack()] and if the first file doesn't have brstacks, it doesn't proceed to try open other files from cmd-line. Check d->session before calling perf_session__delete(). Another solution would be assigning to temporary variable, checking it, but I find it easier to follow with IS_ERR() check in the same function. After some time it's still obvious why the check is needed, and with temp variable it's possible to make the same mistake. Committer testing: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff failed to open perf.data.old: No such file or directory Failed to open perf.data.old $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf diff # Event 'cycles:u' # # Baseline Delta Abs Shared Object Symbol # ........ ......... ................ .......................... # 0.92% +87.66% [unknown] [k] 0xffffffff8825de16 11.39% +0.04% ld-2.32.so [.] __GI___tunables_init 87.70% ld-2.32.so [.] _dl_check_map_versions $ sudo chown root:root perf.data [sudo] password for acme: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data Segmentation fault (core dumped) $ After the patch: $ perf diff failed to open perf.data: Permission denied Failed to open perf.data $ Signed-off-by: Dmitry Safonov Acked-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: dmitry safonov Link: http://lore.kernel.org/lkml/20210302023533.1572231-1-dima@arista.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-diff.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 8f6c784ce629..878e04b1fab7 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1236,7 +1236,8 @@ static int __cmd_diff(void) out_delete: data__for_each_file(i, d) { - perf_session__delete(d->session); + if (!IS_ERR(d->session)) + perf_session__delete(d->session); data__free(d); } -- cgit v1.2.3 From 394e4306b093d037bddcee7e1f0e8e6c53a558fc Mon Sep 17 00:00:00 2001 From: Athira Rajeev Date: Thu, 25 Feb 2021 11:50:02 -0500 Subject: perf bench numa: Fix the condition checks for max number of NUMA nodes In systems having higher node numbers available like node 255, perf numa bench will fail with SIGABORT. <<>> perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes < 0)' failed. Aborted (core dumped) <<>> Snippet from 'numactl -H' below on a powerpc system where the highest node number available is 255: available: 6 nodes (0,8,252-255) node 0 cpus: node 0 size: 519587 MB node 0 free: 516659 MB node 8 cpus: node 8 size: 523607 MB node 8 free: 486757 MB node 252 cpus: node 252 size: 0 MB node 252 free: 0 MB node 253 cpus: node 253 size: 0 MB node 253 free: 0 MB node 254 cpus: node 254 size: 0 MB node 254 free: 0 MB node 255 cpus: node 255 size: 0 MB node 255 free: 0 MB node distances: node 0 8 252 253 254 255 Note: expands to actual cpu list in the original output. These nodes 252-255 are to represent the memory on GPUs and are valid nodes. The perf numa bench init code has a condition check to see if the number of NUMA nodes (nr_nodes) exceeds MAX_NR_NODES. The value of MAX_NR_NODES defined in perf code is 64. And the 'nr_nodes' is the value from numa_max_node() which represents the highest node number available in the system. In some systems where we could have NUMA node 255, this condition check fails and results in SIGABORT. The numa benchmark uses static value of MAX_NR_NODES in the code to represent size of two NUMA node arrays and node bitmask used for setting memory policy. Patch adds a fix to dynamically allocate size for the two arrays and bitmask value based on the node numbers available in the system. With the fix, perf numa benchmark will work with node configuration on any system and thus removes the static MAX_NR_NODES value. Signed-off-by: Athira Jajeev Reviewed-by: Srikar Dronamraju Cc: Jiri Olsa Cc: Kajol Jain Cc: Kan Liang Cc: Madhavan Srinivasan Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: linuxppc-dev@lists.ozlabs.org Link: http://lore.kernel.org/lkml/1614271802-1503-1-git-send-email-atrajeev@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/numa.c | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 11726ec6285f..20b87e29c96f 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -344,18 +344,22 @@ static void mempol_restore(void) static void bind_to_memnode(int node) { - unsigned long nodemask; + struct bitmask *node_mask; int ret; if (node == NUMA_NO_NODE) return; - BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8); - nodemask = 1L << node; + node_mask = numa_allocate_nodemask(); + BUG_ON(!node_mask); - ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8); - dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret); + numa_bitmask_clearall(node_mask); + numa_bitmask_setbit(node_mask, node); + ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1); + dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret); + + numa_bitmask_free(node_mask); BUG_ON(ret); } @@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) prctl(0, bytes_worked); } -#define MAX_NR_NODES 64 - /* * Count the number of nodes a process's threads * are spread out on. @@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked) */ static int count_process_nodes(int process_nr) { - char node_present[MAX_NR_NODES] = { 0, }; + char *node_present; int nodes; int n, t; + node_present = (char *)malloc(g->p.nr_nodes * sizeof(char)); + BUG_ON(!node_present); + for (nodes = 0; nodes < g->p.nr_nodes; nodes++) + node_present[nodes] = 0; + for (t = 0; t < g->p.nr_threads; t++) { struct thread_data *td; int task_nr; @@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); - if (node < 0) /* curr_cpu was likely still -1 */ + if (node < 0) /* curr_cpu was likely still -1 */ { + free(node_present); return 0; + } node_present[node] = 1; } nodes = 0; - for (n = 0; n < MAX_NR_NODES; n++) + for (n = 0; n < g->p.nr_nodes; n++) nodes += node_present[n]; + free(node_present); return nodes; } @@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence) { unsigned int loops_done_min, loops_done_max; int process_groups; - int nodes[MAX_NR_NODES]; + int *nodes; int distance; int nr_min; int nr_max; @@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) if (!g->p.show_convergence && !g->p.measure_convergence) return; + nodes = (int *)malloc(g->p.nr_nodes * sizeof(int)); + BUG_ON(!nodes); for (node = 0; node < g->p.nr_nodes; node++) nodes[node] = 0; @@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence) BUG_ON(sum > g->p.nr_tasks); - if (0 && (sum < g->p.nr_tasks)) + if (0 && (sum < g->p.nr_tasks)) { + free(nodes); return; + } /* * Count the number of distinct process groups present @@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence) } tprintf("\n"); } + + free(nodes); } static void show_summary(double runtime_ns_max, int l, double *convergence) @@ -1413,7 +1429,7 @@ static int init(void) g->p.nr_nodes = numa_max_node() + 1; /* char array in count_process_nodes(): */ - BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0); + BUG_ON(g->p.nr_nodes < 0); if (g->p.show_quiet && !g->p.show_details) g->p.show_details = -1; -- cgit v1.2.3 From 137a5258939aca56558f3a23eb229b9c4b293917 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 14:14:31 -0800 Subject: perf traceevent: Ensure read cmdlines are null terminated. Issue detected by address sanitizer. Fixes: cd4ceb63438e9e28 ("perf util: Save pid-cmdline mapping into tracing header") Signed-off-by: Ian Rogers Acked-by: Namhyung Kim Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226221431.1985458-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/trace-event-read.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index f507dff713c9..8a01af783310 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent) pr_debug("error reading saved cmdlines\n"); goto out; } + buf[ret] = '\0'; parse_saved_cmdline(pevent, buf, size); ret = 0; -- cgit v1.2.3 From b55ff1d1456c86209ba28fd06b1b5fb0e05d92c3 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Fri, 26 Feb 2021 10:31:44 -0800 Subject: perf tools: Fix documentation of verbose options Option doesn't take a value, make sure the man pages agree. For example: $ perf evlist --verbose=1 Error: option `verbose' takes no value Signed-off-by: Ian Rogers Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lore.kernel.org/lkml/20210226183145.1878782-1-irogers@google.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 2 +- tools/perf/Documentation/perf-ftrace.txt | 4 ++-- tools/perf/Documentation/perf-kallsyms.txt | 2 +- tools/perf/Documentation/perf-trace.txt | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index c0a66400a960..9af8b8dfb7b6 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -29,7 +29,7 @@ OPTIONS Show just the sample frequency used for each event. -v:: ---verbose=:: +--verbose:: Show all fields. -g:: diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt index 1e91121bac0f..6e82b7cc0bf0 100644 --- a/tools/perf/Documentation/perf-ftrace.txt +++ b/tools/perf/Documentation/perf-ftrace.txt @@ -28,8 +28,8 @@ OPTIONS specified: function_graph or function. -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. -F:: --funcs:: diff --git a/tools/perf/Documentation/perf-kallsyms.txt b/tools/perf/Documentation/perf-kallsyms.txt index f3c620951f6e..c97527df8ecd 100644 --- a/tools/perf/Documentation/perf-kallsyms.txt +++ b/tools/perf/Documentation/perf-kallsyms.txt @@ -20,5 +20,5 @@ modules). OPTIONS ------- -v:: ---verbose=:: +--verbose:: Increase verbosity level, showing details about symbol table loading, etc. diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index abc9b5d83312..f0da8cf63e9a 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different. Filter out events for these pids and for 'trace' itself (comma separated list). -v:: ---verbose=:: - Verbosity level. +--verbose:: + Increase the verbosity level. --no-inherit:: Child tasks do not inherit counters. -- cgit v1.2.3 From dacfc08dcafa7d443ab339592999e37bbb8a3ef0 Mon Sep 17 00:00:00 2001 From: Antonio Terceiro Date: Wed, 24 Feb 2021 10:00:46 -0300 Subject: perf build: Fix ccache usage in $(CC) when generating arch errno table This was introduced by commit e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table"). Assuming the first word of $(CC) is the actual compiler breaks usage like CC="ccache gcc": the script ends up calling ccache directly with gcc arguments, what fails. Instead of getting the first word, just remove from $(CC) any word that starts with a "-". This maintains the spirit of the original patch, while not breaking ccache users. Fixes: e4ffd066ff440a57 ("perf: Normalize gcc parameter when generating arch errno table") Signed-off-by: Antonio Terceiro Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: He Zhe Cc: Jiri Olsa Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lore.kernel.org/lkml/20210224130046.346977-1-antonio.terceiro@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 536f6f90af92..f6e609673de2 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh $(arch_errno_name_array): $(arch_errno_tbl) - $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@ + $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@ sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh -- cgit v1.2.3 From 31bf4e7cb61363b87f1606ec8efb71eebd6393cf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:09 +0100 Subject: perf daemon: Fix control fifo permissions Add proper mode for mkfifo calls to get read and write permissions for user. We can't use O_RDWR in here, changing to standard permission value. Fixes: 6a6d1804a190 ("perf daemon: Set control fifo for session") Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: John Garry Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 617feaf020f6..8f0ed2e59280 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session, dup2(fd, 2); close(fd); - if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) { perror("failed: create control fifo"); return -1; } - if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) { + if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) { perror("failed: create ack fifo"); return -1; } -- cgit v1.2.3 From 36bc511f63fd21c0c44f973c6d064c1228ba15ae Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:25:10 +0100 Subject: perf daemon: Fix running test for non root user John reported that the daemon test is not working for non root user. Changing the tests configurations so it's allowed to run under normal user. Fixes: 2291bb915b55 ("perf tests: Add daemon 'list' command test") Reported-by: John Garry Signed-off-by: Jiri Olsa Tested-by: John Garry Cc: Alexander Shishkin Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122510.64402-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/daemon.sh | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh index e5b824dd08d9..5ad3ca8d681b 100755 --- a/tools/perf/tests/shell/daemon.sh +++ b/tools/perf/tests/shell/daemon.sh @@ -140,10 +140,10 @@ test_list() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -159,14 +159,14 @@ EOF # check 1st session # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0 local line=`perf daemon --config ${config} -x: | head -2 | tail -1` - check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \ + check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \ ${base}/session-size/output ${base}/session-size/control \ ${base}/session-size/ack "0" # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control \ ${base}/session-time/ack "0" @@ -190,10 +190,10 @@ test_reconfig() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -204,7 +204,7 @@ EOF # check 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e task-clock" ${base}/session-time \ + check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'` @@ -215,10 +215,10 @@ EOF base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF # TEST 1 - change config @@ -238,7 +238,7 @@ EOF # check reconfigured 2nd session # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0 local line=`perf daemon --config ${config} -x: | head -3 | tail -1` - check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \ + check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \ ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0" # TEST 2 - empty config @@ -309,10 +309,10 @@ test_stop() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -361,7 +361,7 @@ test_signal() base=BASE [session-test] -run = -e cpu-clock --switch-output +run = -e cpu-clock --switch-output -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -400,10 +400,10 @@ test_ping() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 [session-time] -run = -e task-clock +run = -e task-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} @@ -439,7 +439,7 @@ test_lock() base=BASE [session-size] -run = -e cpu-clock +run = -e cpu-clock -m 1 sleep 10 EOF sed -i -e "s|BASE|${base}|" ${config} -- cgit v1.2.3 From 84ea603650ec41273cc97d50eb01feed8e6baa2e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 1 Mar 2021 13:23:15 +0100 Subject: perf tools: Fix event's PMU name parsing Jin Yao reported parser error for software event: # perf stat -e software/r1a/ -a -- sleep 1 event syntax error: 'software/r1a/' \___ parser error This happens after commit 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time"), where new software-gt-awake-time event's non-pmu-event-style makes event parser conflict with software PMU. If we allow PE_PMU_EVENT_PRE to be parsed as PMU name, we fix the conflict and the following character '/' for PMU or '-' for non-pmu-event-style event allows parser to decide what even is specified. Fixes: 8c3b1ba0e7ea9a80 ("drm/i915/gt: Track the overall awake/busy time") Reported-by: Jin Yao Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Chris Wilson Cc: Ian Rogers Cc: Mark Rutland Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lore.kernel.org/lkml/20210301122315.63471-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/parse-events.y | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d5b6aff82f21..d57ac86ce7ca 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list, %type PE_EVENT_NAME %type PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE %type PE_DRV_CFG_TERM +%type event_pmu_name %destructor { free ($$); } %type event_term %destructor { parse_events_term__delete ($$); } @@ -272,8 +273,11 @@ event_def: event_pmu | event_legacy_raw sep_dc | event_bpf_file +event_pmu_name: +PE_NAME | PE_PMU_EVENT_PRE + event_pmu: -PE_NAME opt_pmu_config +event_pmu_name opt_pmu_config { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; -- cgit v1.2.3 From b0faef924d21d0a4592ec81c4bc2b4badc35a343 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:03:59 +0900 Subject: perf test: Fix cpu and thread map leaks in basic mmap test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. # perf test -v 4 4: Read samples using the mmap interface : --- start --- test child forked, pid 139782 mmap size 528384B ================================================================= ==139782==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f1f76daee8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x564ba21a0fea in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x564ba21a1a0f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x564ba21a21cf in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x564ba21a21cf in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x564ba1e48298 in test__basic_mmap tests/mmap-basic.c:55 #6 0x564ba1e278fb in run_test tests/builtin-test.c:428 #7 0x564ba1e278fb in test_and_print tests/builtin-test.c:458 #8 0x564ba1e29a53 in __cmd_test tests/builtin-test.c:679 #9 0x564ba1e29a53 in cmd_test tests/builtin-test.c:825 #10 0x564ba1e95cb4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x564ba1d1fa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x564ba1d1fa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x564ba1d1fa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f1f768e4d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Read samples using the mmap interface: FAILED! failed to open shell test directory: /home/namhyung/libexec/perf-core/tests/shell Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Mark Rutland Cc: Stephane Eranian Cc: Ian Rogers Cc: Peter Zijlstra Cc: Adrian Hunter Cc: Ingo Molnar Cc: Leo Yan Cc: Andi Kleen Cc: Alexander Shishkin Link: https://lore.kernel.org/r/20210301140409.184570-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 57093aeacc6f..73ae8f7aa066 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -158,8 +158,6 @@ out_init: out_delete_evlist: evlist__delete(evlist); - cpus = NULL; - threads = NULL; out_free_cpus: perf_cpu_map__put(cpus); out_free_threads: -- cgit v1.2.3 From 09a61c8f86aee7b9c514c6906244a22ec37ef028 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:00 +0900 Subject: perf test: Fix a memory leak in attr test The get_argv_exec_path() returns a dynamic memory so it should be freed after use. $ perf test -v 17 ... ==141682==ERROR: LeakSanitizer: detected memory leaks Direct leak of 33 byte(s) in 1 object(s) allocated from: #0 0x7f09107d2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f091035f6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 33 byte(s) leaked in 1 allocation(s). Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-3-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index ec972e0892ab..dd39ce9b0277 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused) struct stat st; char path_perf[PATH_MAX]; char path_dir[PATH_MAX]; + char *exec_path; /* First try development tree tests. */ if (!lstat("./tests", &st)) return run_dir("./tests", "./perf"); + exec_path = get_argv_exec_path(); + if (exec_path == NULL) + return -1; + /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", get_argv_exec_path()); + snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); + free(exec_path); if (!lstat(path_dir, &st) && !lstat(path_perf, &st)) -- cgit v1.2.3 From 83d25ccde591fe2356ba336e994b190361158b1e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:01 +0900 Subject: perf test: Fix cpu and thread map leaks in task_exit test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 24 24: Number of exit events of a simple workload : --- start --- test child forked, pid 145915 mmap size 528384B ================================================================= ==145915==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fc44e50d1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x561cf50f4d2e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x561cf4eeb949 in thread_map__new_by_tid util/thread_map.c:63 #3 0x561cf4db7fd2 in test__task_exit tests/task-exit.c:74 #4 0x561cf4d798fb in run_test tests/builtin-test.c:428 #5 0x561cf4d798fb in test_and_print tests/builtin-test.c:458 #6 0x561cf4d7ba53 in __cmd_test tests/builtin-test.c:679 #7 0x561cf4d7ba53 in cmd_test tests/builtin-test.c:825 #8 0x561cf4de7d04 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x561cf4c71a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x561cf4c71a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x561cf4c71a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fc44e042d09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Number of exit events of a simple workload: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-4-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bbf94e4aa145..4c2969db59b0 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal); if (err < 0) { pr_debug("Couldn't run the workload!\n"); @@ -137,7 +134,7 @@ out_init: if (retry_count++ > 1000) { pr_debug("Failed after retrying 1000 times\n"); err = -1; - goto out_free_maps; + goto out_delete_evlist; } goto retry; @@ -148,10 +145,9 @@ out_init: err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } -- cgit v1.2.3 From 97ab7c524fdcaf3098997f81bdf9d01157816f30 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:02 +0900 Subject: perf test: Fix cpu and thread map leaks in sw_clock_freq test The evlist has the maps with its own refcounts so we don't need to set the pointers to NULL. Otherwise following error was reported by Asan. Also change the goto label since it doesn't need to have two. # perf test -v 25 25: Software clock events period values : --- start --- test child forked, pid 149154 mmap size 528384B mmap size 528384B ================================================================= ==149154==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fef5cd071f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x56260d5e8b8e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x56260d3df7a9 in thread_map__new_by_tid util/thread_map.c:63 #3 0x56260d2ac6b2 in __test__sw_clock_freq tests/sw-clock.c:65 #4 0x56260d26d8fb in run_test tests/builtin-test.c:428 #5 0x56260d26d8fb in test_and_print tests/builtin-test.c:458 #6 0x56260d26fa53 in __cmd_test tests/builtin-test.c:679 #7 0x56260d26fa53 in cmd_test tests/builtin-test.c:825 #8 0x56260d2dbb64 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x56260d165a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x56260d165a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x56260d165a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fef5c83cd09 in __libc_start_main ../csu/libc-start.c:308 ... test child finished with 1 ---- end ---- Software clock events period values : FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-5-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index a49c9e23053b..74988846be1d 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) .disabled = 1, .freq = 1, }; - struct perf_cpu_map *cpus; - struct perf_thread_map *threads; + struct perf_cpu_map *cpus = NULL; + struct perf_thread_map *threads = NULL; struct mmap *md; attr.sample_freq = 500; @@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) if (!cpus || !threads) { err = -ENOMEM; pr_debug("Not enough memory to create thread/cpu maps\n"); - goto out_free_maps; + goto out_delete_evlist; } perf_evlist__set_maps(&evlist->core, cpus, threads); - cpus = NULL; - threads = NULL; - if (evlist__open(evlist)) { const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate"; @@ -129,10 +126,9 @@ out_init: err = -1; } -out_free_maps: +out_delete_evlist: perf_cpu_map__put(cpus); perf_thread_map__put(threads); -out_delete_evlist: evlist__delete(evlist); return err; } -- cgit v1.2.3 From e06c3ca4922ccf24bd36c007a87f193b235cee93 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:03 +0900 Subject: perf test: Fix cpu and thread map leaks in code_reading test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. Note that this test still has memory leaks in DSOs so it still fails even after this change. I'll take a look at that too. # perf test -v 26 26: Object code reading : --- start --- test child forked, pid 154184 Looking at the vmlinux_path (8 entries long) symsrc__init: build id mismatch for vmlinux. symsrc__init: cannot get elf header. Using /proc/kcore for kernel data Using /proc/kallsyms for symbols Parsing event 'cycles' mmap size 528384B ... ================================================================= ==154184==ERROR: LeakSanitizer: detected memory leaks Direct leak of 439 byte(s) in 1 object(s) allocated from: #0 0x7fcb66e77037 in __interceptor_calloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:154 #1 0x55ad9b7e821e in dso__new_id util/dso.c:1256 #2 0x55ad9b8cfd4a in __machine__addnew_vdso util/vdso.c:132 #3 0x55ad9b8cfd4a in machine__findnew_vdso util/vdso.c:347 #4 0x55ad9b845b7e in map__new util/map.c:176 #5 0x55ad9b8415a2 in machine__process_mmap2_event util/machine.c:1787 #6 0x55ad9b8fab16 in perf_tool__process_synth_event util/synthetic-events.c:64 #7 0x55ad9b8fab16 in perf_event__synthesize_mmap_events util/synthetic-events.c:499 #8 0x55ad9b8fbfdf in __event__synthesize_thread util/synthetic-events.c:741 #9 0x55ad9b8ff3e3 in perf_event__synthesize_thread_map util/synthetic-events.c:833 #10 0x55ad9b738585 in do_test_code_reading tests/code-reading.c:608 #11 0x55ad9b73b25d in test__code_reading tests/code-reading.c:722 #12 0x55ad9b6f28fb in run_test tests/builtin-test.c:428 #13 0x55ad9b6f28fb in test_and_print tests/builtin-test.c:458 #14 0x55ad9b6f4a53 in __cmd_test tests/builtin-test.c:679 #15 0x55ad9b6f4a53 in cmd_test tests/builtin-test.c:825 #16 0x55ad9b760cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #17 0x55ad9b5eaa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #18 0x55ad9b5eaa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #19 0x55ad9b5eaa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #20 0x7fcb669acd09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 471 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Object code reading: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-6-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/code-reading.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 280f0348a09c..2fdc7b2f996e 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore) out_put: thread__put(thread); out_err: - - if (evlist) { - evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); - } + evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); machine__delete_threads(machine); machine__delete(machine); -- cgit v1.2.3 From f2c3202ba0c7746c50c71c14d1ab977d929c0a27 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:04 +0900 Subject: perf test: Fix cpu and thread map leaks in keep_tracking test The evlist and the cpu/thread maps should be released together. Otherwise following error was reported by Asan. $ perf test -v 28 28: Use a dummy software event to keep tracking: --- start --- test child forked, pid 156810 mmap size 528384B ================================================================= ==156810==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7f637d2bce8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55cc6295cffa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55cc6295da1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55cc6295e1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55cc6295e1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55cc626287cf in test__keep_tracking tests/keep-tracking.c:84 #6 0x55cc625e38fb in run_test tests/builtin-test.c:428 #7 0x55cc625e38fb in test_and_print tests/builtin-test.c:458 #8 0x55cc625e5a53 in __cmd_test tests/builtin-test.c:679 #9 0x55cc625e5a53 in cmd_test tests/builtin-test.c:825 #10 0x55cc62651cc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55cc624dba88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55cc624dba88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55cc624dba88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7f637cdf2d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Use a dummy software event to keep tracking: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-7-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/keep-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index e6f1b2a38e03..a0438b0f0805 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -154,10 +154,9 @@ out_err: if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } -- cgit v1.2.3 From 953e7b5960f1cf0825da60dbdc762e19b127a94c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:05 +0900 Subject: perf test: Fix cpu and thread map leaks in switch_tracking test The evlist and cpu/thread maps should be released together. Otherwise the following error was reported by Asan. $ perf test -v 35 35: Track with sched_switch : --- start --- test child forked, pid 159287 Using CPUID GenuineIntel-6-8E-C mmap size 528384B 1295 events recorded ================================================================= ==159287==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7fa28d9a2e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x5652f5a5affa in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x5652f5a5ba1f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x5652f5a5c1df in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x5652f5a5c1df in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x5652f5723bbf in test__switch_tracking tests/switch-tracking.c:350 #6 0x5652f56e18fb in run_test tests/builtin-test.c:428 #7 0x5652f56e18fb in test_and_print tests/builtin-test.c:458 #8 0x5652f56e3a53 in __cmd_test tests/builtin-test.c:679 #9 0x5652f56e3a53 in cmd_test tests/builtin-test.c:825 #10 0x5652f574fcc4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x5652f55d9a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x5652f55d9a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x5652f55d9a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7fa28d4d8d09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Track with sched_switch: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-8-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/switch-tracking.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 15a2ab765d89..3ebaa758df77 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -574,10 +574,9 @@ out: if (evlist) { evlist__disable(evlist); evlist__delete(evlist); - } else { - perf_cpu_map__put(cpus); - perf_thread_map__put(threads); } + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; -- cgit v1.2.3 From 4be42882e1f9c8a2d7d7bc066f420418f45b566c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:06 +0900 Subject: perf test: Fix a thread map leak in thread_map_synthesize test It missed to call perf_thread_map__put() after using the map. $ perf test -v 43 43: Synthesize thread map : --- start --- test child forked, pid 162640 ================================================================= ==162640==ERROR: LeakSanitizer: detected memory leaks Direct leak of 32 byte(s) in 1 object(s) allocated from: #0 0x7fd48cdaa1f8 in __interceptor_realloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:164 #1 0x563e6d5f8d0e in perf_thread_map__realloc /home/namhyung/project/linux/tools/lib/perf/threadmap.c:23 #2 0x563e6d3ef69a in thread_map__new_by_pid util/thread_map.c:46 #3 0x563e6d2cec90 in test__thread_map_synthesize tests/thread-map.c:97 #4 0x563e6d27d8fb in run_test tests/builtin-test.c:428 #5 0x563e6d27d8fb in test_and_print tests/builtin-test.c:458 #6 0x563e6d27fa53 in __cmd_test tests/builtin-test.c:679 #7 0x563e6d27fa53 in cmd_test tests/builtin-test.c:825 #8 0x563e6d2ebce4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #9 0x563e6d175a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #10 0x563e6d175a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #11 0x563e6d175a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #12 0x7fd48c8dfd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 8224 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Synthesize thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-9-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 28f51c4bd373..9e1cf11149ef 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __ TEST_ASSERT_VAL("failed to synthesize map", !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL)); + perf_thread_map__put(threads); return 0; } -- cgit v1.2.3 From 641b6250337027311a09009e18264bb65c4d521c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:07 +0900 Subject: perf test: Fix a memory leak in thread_map_remove test The str should be freed after creating a thread map. Also change the open-coded thread map deletion to a call to perf_thread_map__put(). $ perf test -v 44 44: Remove thread map : --- start --- test child forked, pid 165536 2 threads: 165535, 165536 1 thread: 165536 0 thread: ================================================================= ==165536==ERROR: LeakSanitizer: detected memory leaks Direct leak of 14 byte(s) in 1 object(s) allocated from: #0 0x7f54453ffe8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x7f5444f8c6a7 in __vasprintf_internal libio/vasprintf.c:71 SUMMARY: AddressSanitizer: 14 byte(s) leaked in 1 allocation(s). test child finished with 1 ---- end ---- Remove thread map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-10-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index 9e1cf11149ef..d1e208b4a571 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -110,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb { struct perf_thread_map *threads; char *str; - int i; TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); threads = thread_map__new_str(str, NULL, 0, false); + free(str); TEST_ASSERT_VAL("failed to allocate thread_map", threads); @@ -142,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to not remove thread", thread_map__remove(threads, 0)); - for (i = 0; i < threads->nr; i++) - zfree(&threads->map[i].comm); - - free(threads); + perf_thread_map__put(threads); return 0; } -- cgit v1.2.3 From 690d91f5ec388448f6c2e9e3a8b3da856f400311 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:08 +0900 Subject: perf test: Fix cpu map leaks in cpu_map_print test It should be released after printing the map. $ perf test -v 52 52: Print cpu map : --- start --- test child forked, pid 172233 ================================================================= ==172233==ERROR: LeakSanitizer: detected memory leaks Direct leak of 156 byte(s) in 1 object(s) allocated from: #0 0x7fc472518e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55e63b378f7a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55e63b37a05c in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:237 #3 0x55e63b056d16 in cpu_map_print tests/cpumap.c:102 #4 0x55e63b056d16 in test__cpu_map_print tests/cpumap.c:120 #5 0x55e63afff8fb in run_test tests/builtin-test.c:428 #6 0x55e63afff8fb in test_and_print tests/builtin-test.c:458 #7 0x55e63b001a53 in __cmd_test tests/builtin-test.c:679 #8 0x55e63b001a53 in cmd_test tests/builtin-test.c:825 #9 0x55e63b06dc44 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x55e63aef7a88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x55e63aef7a88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x55e63aef7a88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fc47204ed09 in __libc_start_main ../csu/libc-start.c:308 ... SUMMARY: AddressSanitizer: 448 byte(s) leaked in 7 allocation(s). test child finished with 1 ---- end ---- Print cpu map: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-11-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/cpumap.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 29c793ac7d10..0472b110fe65 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -106,6 +106,8 @@ static int cpu_map_print(const char *str) return -1; cpu_map__snprint(map, buf, sizeof(buf)); + perf_cpu_map__put(map); + return !strcmp(buf, str); } -- cgit v1.2.3 From 846580c235b3e2625ed494f654a28d235976d3b0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Mar 2021 23:04:09 +0900 Subject: perf test: Fix cpu and thread map leaks in perf_time_to_tsc test It should release the maps at the end. $ perf test -v 71 71: Convert perf time to TSC : --- start --- test child forked, pid 178744 mmap size 528384B 1st event perf time 59207256505278 tsc 13187166645142 rdtsc time 59207256542151 tsc 13187166723020 2nd event perf time 59207256543749 tsc 13187166726393 ================================================================= ==178744==ERROR: LeakSanitizer: detected memory leaks Direct leak of 40 byte(s) in 1 object(s) allocated from: #0 0x7faf601f9e8f in __interceptor_malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:145 #1 0x55b620cfc00a in cpu_map__trim_new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:79 #2 0x55b620cfca2f in perf_cpu_map__read /home/namhyung/project/linux/tools/lib/perf/cpumap.c:149 #3 0x55b620cfd1ef in cpu_map__read_all_cpu_map /home/namhyung/project/linux/tools/lib/perf/cpumap.c:166 #4 0x55b620cfd1ef in perf_cpu_map__new /home/namhyung/project/linux/tools/lib/perf/cpumap.c:181 #5 0x55b6209ef1b2 in test__perf_time_to_tsc tests/perf-time-to-tsc.c:73 #6 0x55b6209828fb in run_test tests/builtin-test.c:428 #7 0x55b6209828fb in test_and_print tests/builtin-test.c:458 #8 0x55b620984a53 in __cmd_test tests/builtin-test.c:679 #9 0x55b620984a53 in cmd_test tests/builtin-test.c:825 #10 0x55b6209f0cd4 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #11 0x55b62087aa88 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #12 0x55b62087aa88 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #13 0x55b62087aa88 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #14 0x7faf5fd2fd09 in __libc_start_main ../csu/libc-start.c:308 SUMMARY: AddressSanitizer: 72 byte(s) leaked in 2 allocation(s). test child finished with 1 ---- end ---- Convert perf time to TSC: FAILED! Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Ian Rogers Cc: Ingo Molnar Cc: Leo Yan Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210301140409.184570-12-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/perf-time-to-tsc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index 7cff02664d0e..680c3cffb128 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -167,6 +167,8 @@ next_event: out_err: evlist__delete(evlist); + perf_cpu_map__put(cpus); + perf_thread_map__put(threads); return err; } -- cgit v1.2.3 From 743108e1048ee73e0eda394597c1fc2ea46a599b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 16:44:14 -0300 Subject: tools headers: Update syscall.tbl files to support mount_setattr To pick the changes from: 9caccd41541a6f7d ("fs: introduce MOUNT_ATTR_IDMAP") This adds this new syscall to the tables used by tools such as 'perf trace', so that one can specify it by name and have it filtered, etc. Addressing these perf build warnings: Warning: Kernel ABI header at 'tools/perf/arch/x86/entry/syscalls/syscall_64.tbl' differs from latest version at 'arch/x86/entry/syscalls/syscall_64.tbl' diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl Warning: Kernel ABI header at 'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 'arch/powerpc/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl Warning: Kernel ABI header at 'tools/perf/arch/s390/entry/syscalls/syscall.tbl' differs from latest version at 'arch/s390/kernel/syscalls/syscall.tbl' diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl Cc: Adrian Hunter Cc: Christian Brauner Cc: Ian Rogers Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/YD6Wsxr9ByUbab/a@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/mount.h | 16 ++++++++++++++++ tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/s390/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/x86/entry/syscalls/syscall_64.tbl | 1 + 4 files changed, 19 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index dd8306ea336c..e6524ead2b7b 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -1,6 +1,8 @@ #ifndef _UAPI_LINUX_MOUNT_H #define _UAPI_LINUX_MOUNT_H +#include + /* * These are the fs-independent mount-flags: up to 32 flags are supported * @@ -117,5 +119,19 @@ enum fsconfig_command { #define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ #define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 96b2157f0371..0b2480cf3e47 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -521,3 +521,4 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index d443423495e5..3abef2144dac 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -444,3 +444,4 @@ 439 common faccessat2 sys_faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 compat_sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr sys_mount_setattr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 78672124d28b..7bf01cbe582f 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -363,6 +363,7 @@ 439 common faccessat2 sys_faccessat2 440 common process_madvise sys_process_madvise 441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr # # Due to a historical design error, certain syscalls are numbered differently -- cgit v1.2.3 From 6c0afc579aff90e84736d35ee35a1945ec0f279f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:10:52 -0300 Subject: tools headers UAPI: Update tools' copy of linux/coresight-pmu.h To get the changes in these commits: 88f11864cf1d1324 ("coresight: etm-perf: Support PID tracing for kernel at EL2") 53abf3fe83175626 ("coresight: etm-perf: Clarify comment on perf options") This will possibly be used in patches lined up for v5.13. And silence this perf build warning: Warning: Kernel ABI header at 'tools/include/linux/coresight-pmu.h' differs from latest version at 'include/linux/coresight-pmu.h' diff -u tools/include/linux/coresight-pmu.h include/linux/coresight-pmu.h Cc: Greg Kroah-Hartman Cc: Leo Yan Cc: Mathieu Poirier Cc: Mike Leach Cc: Suzuki K Poulose Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/coresight-pmu.h | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h index b0e35eec6499..4ac5c081af93 100644 --- a/tools/include/linux/coresight-pmu.h +++ b/tools/include/linux/coresight-pmu.h @@ -10,17 +10,27 @@ #define CORESIGHT_ETM_PMU_NAME "cs_etm" #define CORESIGHT_ETM_PMU_SEED 0x10 -/* ETMv3.5/PTM's ETMCR config bit */ -#define ETM_OPT_CYCACC 12 -#define ETM_OPT_CTXTID 14 -#define ETM_OPT_TS 28 -#define ETM_OPT_RETSTK 29 +/* + * Below are the definition of bit offsets for perf option, and works as + * arbitrary values for all ETM versions. + * + * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore, + * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and + * directly use below macros as config bits. + */ +#define ETM_OPT_CYCACC 12 +#define ETM_OPT_CTXTID 14 +#define ETM_OPT_CTXTID2 15 +#define ETM_OPT_TS 28 +#define ETM_OPT_RETSTK 29 /* ETMv4 CONFIGR programming bits for the ETM OPTs */ #define ETM4_CFG_BIT_CYCACC 4 #define ETM4_CFG_BIT_CTXTID 6 +#define ETM4_CFG_BIT_VMID 7 #define ETM4_CFG_BIT_TS 11 #define ETM4_CFG_BIT_RETSTK 12 +#define ETM4_CFG_BIT_VMID_OPT 15 static inline int coresight_get_trace_id(int cpu) { -- cgit v1.2.3 From 1a9bcadd0058a3e81c1beca48e5e08dee9446a01 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:16:17 -0300 Subject: tools headers cpufeatures: Sync with the kernel sources To pick the changes from: 3b9c723ed7cfa4e1 ("KVM: SVM: Add support for SVM instruction address check change") b85a0425d8056f3b ("Enumerate AVX Vector Neural Network instructions") fb35d30fe5b06cc2 ("x86/cpufeatures: Assign dedicated feature word for CPUID_0x8000001F[EAX]") This only causes these perf files to be rebuilt: CC /tmp/build/perf/bench/mem-memcpy-x86-64-asm.o CC /tmp/build/perf/bench/mem-memset-x86-64-asm.o And addresses this perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/asm/cpufeatures.h' differs from latest version at 'arch/x86/include/asm/cpufeatures.h' diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h Cc: Borislav Petkov Cc: Kyung Min Park Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Wei Huang Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 84b887825f12..cc96e26d69f7 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 19 /* N 32-bit words worth of info */ +#define NCAPINTS 20 /* N 32-bit words worth of info */ #define NBUGINTS 1 /* N 32-bit bug flags */ /* @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ +/* FREE! ( 3*32+17) */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -201,7 +201,7 @@ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ +/* FREE! ( 7*32+10) */ #define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ @@ -211,7 +211,7 @@ #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ +/* FREE! ( 7*32+20) */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ @@ -236,8 +236,6 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_VM_PAGE_FLUSH ( 8*32+21) /* "" VM Page Flush MSR is supported */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -294,6 +292,7 @@ #define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ @@ -337,6 +336,7 @@ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ @@ -385,6 +385,13 @@ #define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ +#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ + /* * BUG word(s) */ -- cgit v1.2.3 From 33dc525f93216bc83935ce98518644def04d6c54 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Mar 2021 17:20:08 -0300 Subject: tools headers UAPI: Sync KVM's kvm.h and vmx.h headers with the kernel sources To pick the changes in: fe6b6bc802b40081 ("KVM: VMX: Enable bus lock VM exit") That makes 'perf kvm-stat' aware of this new BUS_LOCK exit reason, thus addressing the following perf build warning: Warning: Kernel ABI header at 'tools/arch/x86/include/uapi/asm/vmx.h' differs from latest version at 'arch/x86/include/uapi/asm/vmx.h' diff -u tools/arch/x86/include/uapi/asm/vmx.h arch/x86/include/uapi/asm/vmx.h Cc: Chenyi Qiang Cc: Paolo Bonzini Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/uapi/asm/kvm.h | 1 + tools/arch/x86/include/uapi/asm/vmx.h | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 8e76d3701db3..5a3022c8af82 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -112,6 +112,7 @@ struct kvm_ioapic_state { #define KVM_NR_IRQCHIPS 3 #define KVM_RUN_X86_SMM (1 << 0) +#define KVM_RUN_X86_BUS_LOCK (1 << 1) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index ada955c5ebb6..b8e650a985e3 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -89,6 +89,7 @@ #define EXIT_REASON_XRSTORS 64 #define EXIT_REASON_UMWAIT 67 #define EXIT_REASON_TPAUSE 68 +#define EXIT_REASON_BUS_LOCK 74 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -150,7 +151,8 @@ { EXIT_REASON_XSAVES, "XSAVES" }, \ { EXIT_REASON_XRSTORS, "XRSTORS" }, \ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ - { EXIT_REASON_TPAUSE, "TPAUSE" } + { EXIT_REASON_TPAUSE, "TPAUSE" }, \ + { EXIT_REASON_BUS_LOCK, "BUS_LOCK" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } -- cgit v1.2.3 From 034f7ee130c19b7b04347238395cff1f402198c3 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 28 Jan 2021 09:34:17 +0800 Subject: perf stat: Fix wrong skipping for per-die aggregation Uncore becomes die-scope on Xeon Cascade Lake-AP and perf has supported --per-die aggregation yet. One issue is found in check_per_pkg() for uncore events running on AP system. On cascade Lake-AP, we have: S0-D0 S0-D1 S1-D0 S1-D1 But in check_per_pkg(), S0-D1 and S1-D1 are skipped because the mask bits for S0 and S1 have been set for S0-D0 and S1-D0. It doesn't check die_id. So the counting for S0-D1 and S1-D1 are set to zero. That's not correct. root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001460963 S0-D0 1 1317376 Bytes llc_misses.mem_read 1.001460963 S0-D1 1 998016 Bytes llc_misses.mem_read 1.001460963 S1-D0 1 970496 Bytes llc_misses.mem_read 1.001460963 S1-D1 1 1291264 Bytes llc_misses.mem_read 2.003488021 S0-D0 1 1082048 Bytes llc_misses.mem_read 2.003488021 S0-D1 1 1919040 Bytes llc_misses.mem_read 2.003488021 S1-D0 1 890752 Bytes llc_misses.mem_read 2.003488021 S1-D1 1 2380800 Bytes llc_misses.mem_read 3.005613270 S0-D0 1 1126080 Bytes llc_misses.mem_read 3.005613270 S0-D1 1 2898176 Bytes llc_misses.mem_read 3.005613270 S1-D0 1 870912 Bytes llc_misses.mem_read 3.005613270 S1-D1 1 3388608 Bytes llc_misses.mem_read 4.007627598 S0-D0 1 1124608 Bytes llc_misses.mem_read 4.007627598 S0-D1 1 3884416 Bytes llc_misses.mem_read 4.007627598 S1-D0 1 921088 Bytes llc_misses.mem_read 4.007627598 S1-D1 1 4451840 Bytes llc_misses.mem_read 5.001479927 S0-D0 1 963328 Bytes llc_misses.mem_read 5.001479927 S0-D1 1 4831936 Bytes llc_misses.mem_read 5.001479927 S1-D0 1 895104 Bytes llc_misses.mem_read 5.001479927 S1-D1 1 5496640 Bytes llc_misses.mem_read From above output, we can see S0-D1 and S1-D1 don't report the interval values, they are continued to grow. That's because check_per_pkg() wrongly decides to use zero counts for S0-D1 and S1-D1. So in check_per_pkg(), we should use hashmap(socket,die) to decide if the cpu counts needs to skip. Only considering socket is not enough. Now with this patch, root@lkp-csl-2ap4 ~# ./perf stat -a -I 1000 -e llc_misses.mem_read --per-die -- sleep 5 1.001586691 S0-D0 1 1229440 Bytes llc_misses.mem_read 1.001586691 S0-D1 1 976832 Bytes llc_misses.mem_read 1.001586691 S1-D0 1 938304 Bytes llc_misses.mem_read 1.001586691 S1-D1 1 1227328 Bytes llc_misses.mem_read 2.003776312 S0-D0 1 1586752 Bytes llc_misses.mem_read 2.003776312 S0-D1 1 875392 Bytes llc_misses.mem_read 2.003776312 S1-D0 1 855616 Bytes llc_misses.mem_read 2.003776312 S1-D1 1 949376 Bytes llc_misses.mem_read 3.006512788 S0-D0 1 1338880 Bytes llc_misses.mem_read 3.006512788 S0-D1 1 920064 Bytes llc_misses.mem_read 3.006512788 S1-D0 1 877184 Bytes llc_misses.mem_read 3.006512788 S1-D1 1 1020736 Bytes llc_misses.mem_read 4.008895291 S0-D0 1 926592 Bytes llc_misses.mem_read 4.008895291 S0-D1 1 906368 Bytes llc_misses.mem_read 4.008895291 S1-D0 1 892224 Bytes llc_misses.mem_read 4.008895291 S1-D1 1 987712 Bytes llc_misses.mem_read 5.001590993 S0-D0 1 962624 Bytes llc_misses.mem_read 5.001590993 S0-D1 1 912512 Bytes llc_misses.mem_read 5.001590993 S1-D0 1 891200 Bytes llc_misses.mem_read 5.001590993 S1-D1 1 978432 Bytes llc_misses.mem_read On no-die system, die_id is 0, actually it's hashmap(socket,0), original behavior is not changed. Reported-by: Ying Huang Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jin Yao Cc: Kan Liang Cc: Peter Zijlstra Cc: Ying Huang Link: http://lore.kernel.org/lkml/20210128013417.25597-1-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 18 ++++++++++++++- tools/perf/util/evsel.h | 4 +++- tools/perf/util/python-ext-sources | 1 + tools/perf/util/stat.c | 47 ++++++++++++++++++++++++++++++-------- 4 files changed, 59 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1bf76864c4f2..7ecbc8e2fbfa 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,6 +46,7 @@ #include "string2.h" #include "memswap.h" #include "util.h" +#include "hashmap.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include @@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->pmu_name); - zfree(&evsel->per_pkg_mask); + evsel__zero_per_pkg(evsel); + hashmap__free(evsel->per_pkg_mask); + evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); } @@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist) return store_evsel_ids(evsel, evlist); } + +void evsel__zero_per_pkg(struct evsel *evsel) +{ + struct hashmap_entry *cur; + size_t bkt; + + if (evsel->per_pkg_mask) { + hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt) + free((char *)cur->key); + + hashmap__clear(evsel->per_pkg_mask); + } +} diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 4e8e49fb7e9d..6026487353dd 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -19,6 +19,7 @@ struct perf_stat_evsel; union perf_event; struct bpf_counter_ops; struct target; +struct hashmap; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); @@ -112,7 +113,7 @@ struct evsel { bool merged_stat; bool reset_group; bool errored; - unsigned long *per_pkg_mask; + struct hashmap *per_pkg_mask; struct evsel *leader; struct list_head config_terms; int err; @@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); +void evsel__zero_per_pkg(struct evsel *evsel); #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 71b753523fac..845dd46e3c61 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -36,3 +36,4 @@ util/symbol_fprintf.c util/units.c util/affinity.c util/rwsem.c +util/hashmap.c diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 5d8af29447f4..c400f8dde017 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -13,6 +13,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" +#include "hashmap.h" #include void update_stats(struct stats *stats, u64 val) @@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) } } -static void zero_per_pkg(struct evsel *counter) +static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { - if (counter->per_pkg_mask) - memset(counter->per_pkg_mask, 0, cpu__max_cpu()); + uint64_t *key = (uint64_t *) __key; + + return *key & 0xffffffff; +} + +static bool pkg_id_equal(const void *__key1, const void *__key2, + void *ctx __maybe_unused) +{ + uint64_t *key1 = (uint64_t *) __key1; + uint64_t *key2 = (uint64_t *) __key2; + + return *key1 == *key2; } static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, int cpu, bool *skip) { - unsigned long *mask = counter->per_pkg_mask; + struct hashmap *mask = counter->per_pkg_mask; struct perf_cpu_map *cpus = evsel__cpus(counter); - int s; + int s, d, ret = 0; + uint64_t *key; *skip = false; @@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter, return 0; if (!mask) { - mask = zalloc(cpu__max_cpu()); + mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL); if (!mask) return -ENOMEM; @@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter, if (s < 0) return -1; - *skip = test_and_set_bit(s, mask) == 1; - return 0; + /* + * On multi-die system, die_id > 0. On no-die system, die_id = 0. + * We use hashmap(socket, die) to check the used socket+die pair. + */ + d = cpu_map__get_die(cpus, cpu, NULL).die; + if (d < 0) + return -1; + + key = malloc(sizeof(*key)); + if (!key) + return -ENOMEM; + + *key = (uint64_t)d << 32 | s; + if (hashmap__find(mask, (void *)key, NULL)) + *skip = true; + else + ret = hashmap__add(mask, (void *)key, (void *)1); + + return ret; } static int @@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, } if (counter->per_pkg) - zero_per_pkg(counter); + evsel__zero_per_pkg(counter); ret = process_counter_maps(config, counter); if (ret) -- cgit v1.2.3 From e2a99c9a9aa02ddc7c08d5089ef140965879f8f4 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:47 +0900 Subject: libperf: Add perf_evlist__reset_id_hash() Add the perf_evlist__reset_id_hash() function as an internal function so that it can be called by perf to reset the hash table. This is necessary for 'perf stat' to run the workload multiple times. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/perf/evlist.c | 13 +++++++++---- tools/lib/perf/include/internal/evlist.h | 2 ++ 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index 17465d454a0e..a0aaf385cbb5 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -26,13 +26,10 @@ void perf_evlist__init(struct perf_evlist *evlist) { - int i; - - for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) - INIT_HLIST_HEAD(&evlist->heads[i]); INIT_LIST_HEAD(&evlist->entries); evlist->nr_entries = 0; fdarray__init(&evlist->pollfd, 64); + perf_evlist__reset_id_hash(evlist); } static void __perf_evlist__propagate_maps(struct perf_evlist *evlist, @@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist, hlist_add_head(&sid->node, &evlist->heads[hash]); } +void perf_evlist__reset_id_hash(struct perf_evlist *evlist) +{ + int i; + + for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i) + INIT_HLIST_HEAD(&evlist->heads[i]); +} + void perf_evlist__id_add(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, u64 id) diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index 2d0fa02b036f..212c29063ad4 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist, struct perf_evsel *evsel, int cpu, int thread, int fd); +void perf_evlist__reset_id_hash(struct perf_evlist *evlist); + #endif /* __LIBPERF_INTERNAL_EVLIST_H */ -- cgit v1.2.3 From 513068f2b1fe39a60d89f6f8afbdd79c2534889c Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Thu, 25 Feb 2021 12:51:48 +0900 Subject: perf stat: Fix use-after-free when -r option is used I got a segfault when using -r option with event groups. The option makes it run the workload multiple times and it will reuse the evlist and evsel for each run. While most of resources are allocated and freed properly, the id hash in the evlist was not and it resulted in the bug. You can see it with the address sanitizer like below: $ perf stat -r 100 -e '{cycles,instructions}' true ================================================================= ==693052==ERROR: AddressSanitizer: heap-use-after-free on address 0x6080000003d0 at pc 0x558c57732835 bp 0x7fff1526adb0 sp 0x7fff1526ada8 WRITE of size 8 at 0x6080000003d0 thread T0 #0 0x558c57732834 in hlist_add_head /home/namhyung/project/linux/tools/include/linux/list.h:644 #1 0x558c57732834 in perf_evlist__id_hash /home/namhyung/project/linux/tools/lib/perf/evlist.c:237 #2 0x558c57732834 in perf_evlist__id_add /home/namhyung/project/linux/tools/lib/perf/evlist.c:244 #3 0x558c57732834 in perf_evlist__id_add_fd /home/namhyung/project/linux/tools/lib/perf/evlist.c:285 #4 0x558c5747733e in store_evsel_ids util/evsel.c:2765 #5 0x558c5747733e in evsel__store_ids util/evsel.c:2782 #6 0x558c5730b717 in __run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:895 #7 0x558c5730b717 in run_perf_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:1014 #8 0x558c5730b717 in cmd_stat /home/namhyung/project/linux/tools/perf/builtin-stat.c:2446 #9 0x558c57427c24 in run_builtin /home/namhyung/project/linux/tools/perf/perf.c:313 #10 0x558c572b1a48 in handle_internal_command /home/namhyung/project/linux/tools/perf/perf.c:365 #11 0x558c572b1a48 in run_argv /home/namhyung/project/linux/tools/perf/perf.c:409 #12 0x558c572b1a48 in main /home/namhyung/project/linux/tools/perf/perf.c:539 #13 0x7fcadb9f7d09 in __libc_start_main ../csu/libc-start.c:308 #14 0x558c572b60f9 in _start (/home/namhyung/project/linux/tools/perf/perf+0x45d0f9) Actually the nodes in the hash table are struct perf_stream_id and they were freed in the previous run. Fix it by resetting the hash. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210225035148.778569-2-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 5121b4db66fe..882cd1f721d9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist) perf_evsel__free_fd(&evsel->core); perf_evsel__free_id(&evsel->core); } + perf_evlist__reset_id_hash(&evlist->core); } static int evlist__create_syswide_maps(struct evlist *evlist) -- cgit v1.2.3 From bd57a9f33abc0adede5bafa06b2f1af3de03190d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 24 Feb 2021 16:14:38 +0900 Subject: perf daemon: Fix compile error with Asan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm seeing a build failure when build with address sanitizer. It seems we could write to the name[100] if the var is longer. $ make EXTRA_CFLAGS=-fsanitize=address ... CC builtin-daemon.o In function ‘get_session_name’, inlined from ‘session_config’ at builtin-daemon.c:164:6, inlined from ‘server_config’ at builtin-daemon.c:223:10: builtin-daemon.c:155:11: error: writing 1 byte into a region of size 0 [-Werror=stringop-overflow=] 155 | *session = 0; | ~~~~~~~~~^~~ builtin-daemon.c: In function ‘server_config’: builtin-daemon.c:162:7: note: at offset 100 to object ‘name’ with size 100 declared here 162 | char name[100]; | ^~~~ Fixes: c0666261ff38 ("perf daemon: Add config file support") Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Peter Zijlstra Cc: Stephane Eranian Link: https://lore.kernel.org/r/20210224071438.686677-1-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 8f0ed2e59280..ace8772a4f03 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va struct daemon_session *session; char name[100]; - if (get_session_name(var, name, sizeof(name))) + if (get_session_name(var, name, sizeof(name) - 1)) return -EINVAL; var = strchr(var, '.'); -- cgit v1.2.3 From ec4d0a7680c793ef68d47507fcec245019ee6f33 Mon Sep 17 00:00:00 2001 From: Nicholas Fraser Date: Fri, 19 Feb 2021 11:09:32 -0500 Subject: perf archive: Fix filtering of empty build-ids A non-existent build-id used to be treated as all-zero SHA-1 hash. Build-ids are now variable width. A non-existent build-id is an empty string and "perf buildid-list" pads this with spaces. This is true even when using old perf.data files recorded from older versions of perf; "perf buildid-list" never reports an all-zero hash anymore. This fixes "perf-archive" to skip missing build-ids by skipping lines that start with a padding space rather than with zeroes. Signed-off-by: Nicholas Fraser Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Huw Davies Cc: Ian Rogers Cc: Ingo Molnar Cc: Mark Rutland Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ulrich Czekalla Link: https://lore.kernel.org/r/442bffc7-ac5c-0975-b876-a549efce2413@codeweavers.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-archive.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf-archive.sh b/tools/perf/perf-archive.sh index 0cfb3e2cefef..133f0eddbcc4 100644 --- a/tools/perf/perf-archive.sh +++ b/tools/perf/perf-archive.sh @@ -20,9 +20,8 @@ else fi BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX) -NOBUILDID=0000000000000000000000000000000000000000 -perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS +perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS if [ ! -s $BUILDIDS ] ; then echo "perf archive: no build-ids found" rm $BUILDIDS || true -- cgit v1.2.3 From a8146d66ab0184ad1728eaeb59cfdf256f4b8fbf Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:24 -0800 Subject: perf test: Fix sample-parsing failure on non-x86 platforms Executing 'perf test 27' fails on s390: [root@t35lp46 perf]# ./perf test -Fv 27 27: Sample parsing --- start --- ---- end ---- Sample parsing: FAILED! [root@t35lp46 perf]# The commit fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") changes the ins_lat to a model-specific variable only for X86, but perf test still verify the variable in the generic test. Remove the ins_lat check in the generic test. The following patch will add it in the X86 specific test. Fixes: fbefe9c2f87fd392 ("perf tools: Support arch specific PERF_SAMPLE_WEIGHT_STRUCT processing") Reported-by: Thomas Richter Signed-off-by: Kan Liang Tested-by: Thomas Richter Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-1-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sample-parsing.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 0dbe3aa99853..8fd8a4ef97da 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1, if (type & PERF_SAMPLE_WEIGHT) COMP(weight); - if (type & PERF_SAMPLE_WEIGHT_STRUCT) - COMP(ins_lat); - if (type & PERF_SAMPLE_DATA_SRC) COMP(data_src); @@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) .cgroup = 114, .data_page_size = 115, .code_page_size = 116, - .ins_lat = 117, .aux_sample = { .size = sizeof(aux_data), .data = (void *)aux_data, -- cgit v1.2.3 From 7d9d4c6edba93cd96899affe2fc60c3341df152c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Wed, 3 Mar 2021 08:01:25 -0800 Subject: perf test: Support the ins_lat check in the X86 specific test The ins_lat of PERF_SAMPLE_WEIGHT_STRUCT stands for the instruction latency, which is only available for X86. Add a X86 specific test for the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT type. The test__x86_sample_parsing() uses the same way as the test__sample_parsing() to verify a sample type. Since the ins_lat and PERF_SAMPLE_WEIGHT_STRUCT are the only X86 specific sample type for now, the test__x86_sample_parsing() only verify the PERF_SAMPLE_WEIGHT_STRUCT type. Other sample types are still verified in the generic test. $ perf test 77 -v 77: x86 Sample parsing : --- start --- test child forked, pid 102370 test child finished with 0 ---- end ---- x86 Sample parsing: Ok Signed-off-by: Kan Liang Cc: Athira Jajeev Cc: Heiko Carstens Cc: Sumanth Korikkar Cc: Sven Schnelle Cc: Thomas Richter Cc: Vasily Gorbik Link: http://lore.kernel.org/lkml/1614787285-104151-2-git-send-email-kan.liang@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/include/arch-tests.h | 1 + tools/perf/arch/x86/tests/Build | 1 + tools/perf/arch/x86/tests/arch-tests.c | 4 + tools/perf/arch/x86/tests/sample-parsing.c | 121 +++++++++++++++++++++++++++++ 4 files changed, 127 insertions(+) create mode 100644 tools/perf/arch/x86/tests/sample-parsing.c (limited to 'tools') diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a54b94f1c25..0e20f3dc69f3 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest); int test__insn_x86(struct test *test __maybe_unused, int subtest); int test__intel_pt_pkt_decoder(struct test *test, int subtest); int test__bp_modify(struct test *test, int subtest); +int test__x86_sample_parsing(struct test *test, int subtest); #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 36d4f248b51d..28d793390198 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += rdpmc.o +perf-y += sample-parsing.o perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index bc25d727b4e9..71aa67367ad6 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -30,6 +30,10 @@ struct test arch_tests[] = { .func = test__bp_modify, }, #endif + { + .desc = "x86 Sample parsing", + .func = test__x86_sample_parsing, + }, { .func = NULL, }, diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c new file mode 100644 index 000000000000..c92db87e4479 --- /dev/null +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include + +#include "event.h" +#include "evsel.h" +#include "debug.h" +#include "util/synthetic-events.h" + +#include "tests/tests.h" +#include "arch-tests.h" + +#define COMP(m) do { \ + if (s1->m != s2->m) { \ + pr_debug("Samples differ at '"#m"'\n"); \ + return false; \ + } \ +} while (0) + +static bool samples_same(const struct perf_sample *s1, + const struct perf_sample *s2, + u64 type) +{ + if (type & PERF_SAMPLE_WEIGHT_STRUCT) + COMP(ins_lat); + + return true; +} + +static int do_test(u64 sample_type) +{ + struct evsel evsel = { + .needs_swap = false, + .core = { + . attr = { + .sample_type = sample_type, + .read_format = 0, + }, + }, + }; + union perf_event *event; + struct perf_sample sample = { + .weight = 101, + .ins_lat = 102, + }; + struct perf_sample sample_out; + size_t i, sz, bufsz; + int err, ret = -1; + + sz = perf_event__sample_event_size(&sample, sample_type, 0); + bufsz = sz + 4096; /* Add a bit for overrun checking */ + event = malloc(bufsz); + if (!event) { + pr_debug("malloc failed\n"); + return -1; + } + + memset(event, 0xff, bufsz); + event->header.type = PERF_RECORD_SAMPLE; + event->header.misc = 0; + event->header.size = sz; + + err = perf_event__synthesize_sample(event, sample_type, 0, &sample); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "perf_event__synthesize_sample", sample_type, err); + goto out_free; + } + + /* The data does not contain 0xff so we use that to check the size */ + for (i = bufsz; i > 0; i--) { + if (*(i - 1 + (u8 *)event) != 0xff) + break; + } + if (i != sz) { + pr_debug("Event size mismatch: actual %zu vs expected %zu\n", + i, sz); + goto out_free; + } + + evsel.sample_size = __evsel__sample_size(sample_type); + + err = evsel__parse_sample(&evsel, event, &sample_out); + if (err) { + pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", + "evsel__parse_sample", sample_type, err); + goto out_free; + } + + if (!samples_same(&sample, &sample_out, sample_type)) { + pr_debug("parsing failed for sample_type %#"PRIx64"\n", + sample_type); + goto out_free; + } + + ret = 0; +out_free: + free(event); + + return ret; +} + +/** + * test__x86_sample_parsing - test X86 specific sample parsing + * + * This function implements a test that synthesizes a sample event, parses it + * and then checks that the parsed sample matches the original sample. If the + * test passes %0 is returned, otherwise %-1 is returned. + * + * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type. + * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type. + */ +int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused) +{ + return do_test(PERF_SAMPLE_WEIGHT_STRUCT); +} -- cgit v1.2.3 From c1f272df510c6b1db68ca6597724d17b557d1407 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Mar 2021 09:51:52 -0300 Subject: perf tests x86: Move insn.h include to make sure it finds stddef.h In some versions of alpine Linux the perf build is broken since commit 1d509f2a6ebca1ae ("x86/insn: Support big endian cross-compiles"): In file included from /usr/include/linux/byteorder/little_endian.h:13, from /usr/include/asm/byteorder.h:5, from arch/x86/util/../../../../arch/x86/include/asm/insn.h:10, from arch/x86/util/archinsn.c:2: /usr/include/linux/swab.h:161:8: error: unknown type name '__always_inline' static __always_inline __u16 __swab16p(const __u16 *p) So move the inclusion of arch/x86/include/asm/insn.h to later in the places where linux/stddef.h (that conditionally defines __always_inline) to workaround this problem on Alpine Linux 3.9 to 3.11, 3.12 onwards works. Cc: Josh Poimboeuf Cc: Martin Schwidefsky Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/insn-x86.c | 2 +- tools/perf/arch/x86/util/archinsn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c index f782ef8c5982..4f75ae990140 100644 --- a/tools/perf/arch/x86/tests/insn-x86.c +++ b/tools/perf/arch/x86/tests/insn-x86.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include "../../../../arch/x86/include/asm/insn.h" #include #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "../../../../arch/x86/include/asm/insn.h" #include "intel-pt-decoder/intel-pt-insn-decoder.h" diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c index 3e6791531ca5..34d600c51044 100644 --- a/tools/perf/arch/x86/util/archinsn.c +++ b/tools/perf/arch/x86/util/archinsn.c @@ -1,10 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../../../arch/x86/include/asm/insn.h" #include "archinsn.h" #include "event.h" #include "machine.h" #include "thread.h" #include "symbol.h" +#include "../../../../arch/x86/include/asm/insn.h" void arch_fetch_insn(struct perf_sample *sample, struct thread *thread, -- cgit v1.2.3 From 6740a4e70e5d1b9d8e7fe41fd46dd5656d65dadf Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Thu, 4 Mar 2021 11:59:58 +0530 Subject: perf report: Fix -F for branch & mem modes perf report fails to add valid additional fields with -F when used with branch or mem modes. Fix it. Before patch: $ perf record -b $ perf report -b -F +srcline_from --stdio Error: Invalid --fields key: `srcline_from' After patch: $ perf report -b -F +srcline_from --stdio # Samples: 8K of event 'cycles' # Event count (approx.): 8784 ... Committer notes: There was an inversion: when looking at branch stack dimensions (keys) it was checking if the sort mode was 'mem', not 'branch'. Fixes: aa6b3c99236b ("perf report: Make -F more strict like -s") Reported-by: Athira Jajeev Signed-off-by: Ravi Bangoria Reviewed-by: Athira Jajeev Tested-by: Arnaldo Carvalho de Melo Tested-by: Athira Jajeev Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lore.kernel.org/lkml/20210304062958.85465-1-ravi.bangoria@linux.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 0d5ad42812b9..552b590485bf 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__MEMORY) + if (sort__mode != SORT_MODE__BRANCH) return -EINVAL; return __sort_dimension__add_output(list, sd); @@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok) if (strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if (sort__mode != SORT_MODE__MEMORY) return -EINVAL; return __sort_dimension__add_output(list, sd); -- cgit v1.2.3 From 77d02bd00cea9f1a87afe58113fa75b983d6c23a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 5 Mar 2021 10:02:09 -0300 Subject: perf map: Tighten snprintf() string precision to pass gcc check on some 32-bit arches Noticed on a debian:experimental mips and mipsel cross build build environment: perfbuilder@ec265a086e9b:~$ mips-linux-gnu-gcc --version | head -1 mips-linux-gnu-gcc (Debian 10.2.1-3) 10.2.1 20201224 perfbuilder@ec265a086e9b:~$ CC /tmp/build/perf/util/map.o util/map.c: In function 'map__new': util/map.c:109:5: error: '%s' directive output may be truncated writing between 1 and 2147483645 bytes into a region of size 4096 [-Werror=format-truncation=] 109 | "%s/platforms/%s/arch-%s/usr/lib/%s", | ^~ In file included from /usr/mips-linux-gnu/include/stdio.h:867, from util/symbol.h:11, from util/map.c:2: /usr/mips-linux-gnu/include/bits/stdio2.h:67:10: note: '__builtin___snprintf_chk' output 32 or more bytes (assuming 4294967321) into a destination of size 4096 67 | return __builtin___snprintf_chk (__s, __n, __USE_FORTIFY_LEVEL - 1, | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 68 | __bos (__s), __fmt, __va_arg_pack ()); | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ cc1: all warnings being treated as errors Since we have the lenghts for what lands in that place, use it to give the compiler more info and make it happy. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/map.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 692e56dc832e..fbc40a2c17d4 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (strstarts(filename, "/system/lib/")) { char *ndk, *app; const char *arch; - size_t ndk_length; - size_t app_length; + int ndk_length, app_length; ndk = getenv("NDK_ROOT"); app = getenv("APP_PLATFORM"); @@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename) if (new_length > PATH_MAX) return false; snprintf(newfilename, new_length, - "%s/platforms/%s/arch-%s/usr/lib/%s", - ndk, app, arch, libname); + "%.*s/platforms/%.*s/arch-%s/usr/lib/%s", + ndk_length, ndk, app_length, app, arch, libname); return true; } -- cgit v1.2.3 From 86a19008af5d88d5d523dbfe9b6ede11473e9a7f Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 2 Mar 2021 15:41:20 +0100 Subject: perf trace: Fix race in signal handling Since a lot of stuff happens before the SIGINT signal handler is registered (scanning /proc/*, etc.), on bigger systems, such as Cavium Sabre CN99xx, it may happen that first interrupt signal is lost and perf isn't correctly terminated. The reproduction code might look like the following: perf trace -a & PERF_PID=$! sleep 4 kill -INT $PERF_PID The issue has been found on a CN99xx machine with RHEL-8 and the patch fixes it by registering the signal handlers earlier in the init stage. Suggested-by: Jiri Olsa Signed-off-by: Michael Petlan Tested-by: Michael Petlan Cc: Jiri Olsa Link: https://lore.kernel.org/lkml/YEJnaMzH2ctp3PPx@kernel.org/ Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 85b6a46e85b6..7ec18ff57fc4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) evlist__config(evlist, &trace->opts, &callchain_param); - signal(SIGCHLD, sig_handler); - signal(SIGINT, sig_handler); - if (forks) { err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL); if (err < 0) { @@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv) signal(SIGSEGV, sighandler_dump_stack); signal(SIGFPE, sighandler_dump_stack); + signal(SIGCHLD, sig_handler); + signal(SIGINT, sig_handler); trace.evlist = evlist__new(); trace.sctbl = syscalltbl__new(); -- cgit v1.2.3 From 6fc5baf5471700fd613f0b4e52ab4563f1942b78 Mon Sep 17 00:00:00 2001 From: Suzuki K Poulose Date: Sat, 6 Feb 2021 23:08:29 +0800 Subject: perf cs-etm: Fix bitmap for option When set option with macros ETM_OPT_CTXTID and ETM_OPT_TS, it wrongly takes these two values (14 and 28 prespectively) as bit masks, but actually both are the offset for bits. But this doesn't lead to further failure due to the AND logic operation will be always true for ETM_OPT_CTXTID / ETM_OPT_TS. This patch defines new independent macros (rather than using the "config" bits) for requesting the "contextid" and "timestamp" for cs_etm_set_option(). Signed-off-by: Suzuki Poulouse Reviewed-by: Mike Leach Cc: Al Grant Cc: Daniel Kiss Cc: Denis Nikitin Cc: Jiri Olsa Cc: John Garry Cc: Jonathan Corbet Cc: Leo Yan Cc: Mark Rutland Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: Will Deacon Cc: coresight@lists.linaro.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-doc@vger.kernel.org Link: http://lore.kernel.org/lkml/20210206150833.42120-5-leo.yan@linaro.org [ Extract the change as a separate patch for easier review ] Signed-off-by: Leo Yan Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/cs-etm.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index bd446aba64f7..c25c878fd06c 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -156,6 +156,10 @@ out: return err; } +#define ETM_SET_OPT_CTXTID (1 << 0) +#define ETM_SET_OPT_TS (1 << 1) +#define ETM_SET_OPT_MASK (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS) + static int cs_etm_set_option(struct auxtrace_record *itr, struct evsel *evsel, u32 option) { @@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr, !cpu_map__has(online_cpus, i)) continue; - if (option & ETM_OPT_CTXTID) { + if (option & ETM_SET_OPT_CTXTID) { err = cs_etm_set_context_id(itr, evsel, i); if (err) goto out; } - if (option & ETM_OPT_TS) { + if (option & ETM_SET_OPT_TS) { err = cs_etm_set_timestamp(itr, evsel, i); if (err) goto out; } - if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS)) + if (option & ~(ETM_SET_OPT_MASK)) /* Nothing else is currently supported */ goto out; } @@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr, evsel__set_sample_bit(cs_etm_evsel, CPU); err = cs_etm_set_option(itr, cs_etm_evsel, - ETM_OPT_CTXTID | ETM_OPT_TS); + ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS); if (err) goto out; } -- cgit v1.2.3