diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 10:19:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-07-09 10:19:13 -0700 |
commit | dcf3c935dd9e8e76c9922e88672fa4ad6a8a4df8 (patch) | |
tree | f8ce3ab321c70b666e14ed145faacc8b3c0ea82c /arch/um/kernel | |
parent | 7a400bf28334fc7734639db3566394e1fc80670c (diff) | |
parent | 1aee020155f364ef538370d3392969f1077b9bae (diff) | |
download | linux-dcf3c935dd9e8e76c9922e88672fa4ad6a8a4df8.tar.bz2 |
Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger:
- Support for optimized routines based on the host CPU
- Support for PCI via virtio
- Various fixes
* tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml:
um: remove unneeded semicolon in um_arch.c
um: Remove the repeated declaration
um: fix error return code in winch_tramp()
um: fix error return code in slip_open()
um: Fix stack pointer alignment
um: implement flush_cache_vmap/flush_cache_vunmap
um: add a UML specific futex implementation
um: enable the use of optimized xor routines in UML
um: Add support for host CPU flags and alignment
um: allow not setting extra rpaths in the linux binary
um: virtio/pci: enable suspend/resume
um: add PCI over virtio emulation driver
um: irqs: allow invoking time-travel handler multiple times
um: time-travel/signals: fix ndelay() in interrupt
um: expose time-travel mode to userspace side
um: export signals_enabled directly
um: remove unused smp_sigio_handler() declaration
lib: add iomem emulation (logic_iomem)
um: allow disabling NO_IOMEM
Diffstat (limited to 'arch/um/kernel')
-rw-r--r-- | arch/um/kernel/Makefile | 14 | ||||
-rw-r--r-- | arch/um/kernel/ioport.c | 13 | ||||
-rw-r--r-- | arch/um/kernel/irq.c | 52 | ||||
-rw-r--r-- | arch/um/kernel/ksyms.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/skas/clone.c | 2 | ||||
-rw-r--r-- | arch/um/kernel/skas/uaccess.c | 136 | ||||
-rw-r--r-- | arch/um/kernel/time.c | 35 | ||||
-rw-r--r-- | arch/um/kernel/um_arch.c | 48 |
8 files changed, 258 insertions, 44 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile index e698e0c7dbdc..1d18e4e46989 100644 --- a/arch/um/kernel/Makefile +++ b/arch/um/kernel/Makefile @@ -17,18 +17,19 @@ extra-y := vmlinux.lds obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \ physmem.o process.o ptrace.o reboot.o sigio.o \ signal.o syscall.o sysrq.o time.o tlb.o trap.o \ - um_arch.o umid.o maccess.o kmsg_dump.o skas/ + um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/ obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o obj-$(CONFIG_GPROF) += gprof_syms.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_STACKTRACE) += stacktrace.o +obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o USER_OBJS := config.o include arch/um/scripts/Makefile.rules -targets := config.c config.tmp +targets := config.c config.tmp capflags.c # Be careful with the below Sed code - sed is pitfall-rich! # We use sed to lower build requirements, for "embedded" builders for instance. @@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE $@ $(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE $(call if_changed,quote2) +quiet_cmd_mkcapflags = MKCAP $@ + cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^ + +cpufeature = $(src)/../../x86/include/asm/cpufeatures.h +vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h + +$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE + $(call if_changed,mkcapflags) + quiet_cmd_quote2 = QUOTE $@ cmd_quote2 = sed -e '/CONFIG/{' \ -e 's/"CONFIG"//' \ diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c new file mode 100644 index 000000000000..7220615b3beb --- /dev/null +++ b/arch/um/kernel/ioport.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2021 Intel Corporation + * Author: Johannes Berg <johannes@sipsolutions.net> + */ +#include <asm/iomap.h> +#include <asm-generic/pci_iomap.h> + +void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port, + unsigned int nr) +{ + return NULL; +} diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c index 82af5191e73d..a8873d9bc28b 100644 --- a/arch/um/kernel/irq.c +++ b/arch/um/kernel/irq.c @@ -56,7 +56,7 @@ struct irq_entry { static DEFINE_SPINLOCK(irq_lock); static LIST_HEAD(active_fds); -static DECLARE_BITMAP(irqs_allocated, NR_IRQS); +static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ); static bool irqs_suspended; static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs) @@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, if (!reg->timetravel_handler) return false; - /* prevent nesting - we'll get it again later when we SIGIO ourselves */ - if (reg->pending_on_resume) - return true; - + /* + * Handle all messages - we might get multiple even while + * interrupts are already suspended, due to suspend order + * etc. Note that time_travel_add_irq_event() will not add + * an event twice, if it's pending already "first wins". + */ reg->timetravel_handler(reg->irq, entry->fd, reg->id, ®->event); if (!reg->event.pending) @@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry, #endif static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t, - struct uml_pt_regs *regs) + struct uml_pt_regs *regs, + bool timetravel_handlers_only) { struct irq_reg *reg = &entry->reg[t]; @@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type if (irq_do_timetravel_handler(entry, t)) return; - if (irqs_suspended) + /* + * If we're called to only run time-travel handlers then don't + * actually proceed but mark sigio as pending (if applicable). + * For suspend/resume, timetravel_handlers_only may be true + * despite time-travel not being configured and used. + */ + if (timetravel_handlers_only) { +#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT + mark_sigio_pending(); +#endif return; + } irq_io_loop(reg, regs); } -void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +static void _sigio_handler(struct uml_pt_regs *regs, + bool timetravel_handlers_only) { struct irq_entry *irq_entry; int n, i; - if (irqs_suspended && !um_irq_timetravel_handler_used()) + if (timetravel_handlers_only && !um_irq_timetravel_handler_used()) return; while (1) { @@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) irq_entry = os_epoll_get_data_pointer(i); for (t = 0; t < NUM_IRQ_TYPES; t++) - sigio_reg_handler(i, irq_entry, t, regs); + sigio_reg_handler(i, irq_entry, t, regs, + timetravel_handlers_only); } } - if (!irqs_suspended) + if (!timetravel_handlers_only) free_irqs(); } +void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) +{ + _sigio_handler(regs, irqs_suspended); +} + static struct irq_entry *get_irq_entry_by_fd(int fd) { struct irq_entry *walk; @@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs) void um_free_irq(int irq, void *dev) { - if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq)) + if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ, + "freeing invalid irq %d", irq)) return; free_irq_by_irq_and_dev(irq, dev); @@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type, devname, dev_id, timetravel_handler); } EXPORT_SYMBOL(um_request_irq_tt); + +void sigio_run_timetravel_handlers(void) +{ + _sigio_handler(NULL, true); +} #endif #ifdef CONFIG_PM_SLEEP @@ -623,7 +649,7 @@ void __init init_IRQ(void) irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq); - for (i = 1; i < NR_IRQS; i++) + for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++) irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq); /* Initialize EPOLL Loop */ os_setup_epoll(); diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c index 8ade54a86a7e..b1e5634398d0 100644 --- a/arch/um/kernel/ksyms.c +++ b/arch/um/kernel/ksyms.c @@ -7,7 +7,7 @@ #include <os.h> EXPORT_SYMBOL(set_signals); -EXPORT_SYMBOL(get_signals); +EXPORT_SYMBOL(signals_enabled); EXPORT_SYMBOL(os_stat_fd); EXPORT_SYMBOL(os_stat_file); diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c index 592cdb138441..5afac0fef24e 100644 --- a/arch/um/kernel/skas/clone.c +++ b/arch/um/kernel/skas/clone.c @@ -29,7 +29,7 @@ stub_clone_handler(void) long err; err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD, - (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *)); + (unsigned long)data + UM_KERN_PAGE_SIZE / 2); if (err) { data->parent_err = err; goto done; diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c index 2dec915abe6f..6c76df96e858 100644 --- a/arch/um/kernel/skas/uaccess.c +++ b/arch/um/kernel/skas/uaccess.c @@ -11,6 +11,7 @@ #include <asm/current.h> #include <asm/page.h> #include <kern_util.h> +#include <asm/futex.h> #include <os.h> pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr) @@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len) return 0; } EXPORT_SYMBOL(__strnlen_user); + +/** + * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant + * argument and comparison of the previous + * futex value with another constant. + * + * @encoded_op: encoded operation to execute + * @uaddr: pointer to user space address + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Operation not supported + */ + +int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr) +{ + int oldval, ret; + struct page *page; + unsigned long addr = (unsigned long) uaddr; + pte_t *pte; + + ret = -EFAULT; + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + preempt_disable(); + pte = maybe_map(addr, 1); + if (pte == NULL) + goto out_inuser; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + addr = (unsigned long) page_address(page) + + (((unsigned long) addr) & ~PAGE_MASK); +#else + addr = (unsigned long) kmap_atomic(page) + + ((unsigned long) addr & ~PAGE_MASK); +#endif + uaddr = (u32 *) addr; + oldval = *uaddr; + + ret = 0; + + switch (op) { + case FUTEX_OP_SET: + *uaddr = oparg; + break; + case FUTEX_OP_ADD: + *uaddr += oparg; + break; + case FUTEX_OP_OR: + *uaddr |= oparg; + break; + case FUTEX_OP_ANDN: + *uaddr &= ~oparg; + break; + case FUTEX_OP_XOR: + *uaddr ^= oparg; + break; + default: + ret = -ENOSYS; + } +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic((void *)addr); +#endif + +out_inuser: + preempt_enable(); + + if (ret == 0) + *oval = oldval; + + return ret; +} +EXPORT_SYMBOL(arch_futex_atomic_op_inuser); + +/** + * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the + * uaddr with newval if the current value is + * oldval. + * @uval: pointer to store content of @uaddr + * @uaddr: pointer to user space address + * @oldval: old value + * @newval: new value to store to @uaddr + * + * Return: + * 0 - On success + * -EFAULT - User access resulted in a page fault + * -EAGAIN - Atomic operation was unable to complete due to contention + * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG) + */ + +int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + struct page *page; + pte_t *pte; + int ret = -EFAULT; + + if (!access_ok(uaddr, sizeof(*uaddr))) + return -EFAULT; + + preempt_disable(); + pte = maybe_map((unsigned long) uaddr, 1); + if (pte == NULL) + goto out_inatomic; + + page = pte_page(*pte); +#ifdef CONFIG_64BIT + pagefault_disable(); + uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK); +#else + uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK); +#endif + + *uval = *uaddr; + + ret = cmpxchg(uaddr, oldval, newval); + +#ifdef CONFIG_64BIT + pagefault_enable(); +#else + kunmap_atomic(uaddr); +#endif + ret = 0; + +out_inatomic: + preempt_enable(); + return ret; +} +EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic); diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c index e0cdb9694fb8..fddd1dec27e6 100644 --- a/arch/um/kernel/time.c +++ b/arch/um/kernel/time.c @@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg, int ret; /* - * Poll outside the locked section (if we're not called to only read - * the response) so we can get interrupts for e.g. virtio while we're - * here, but then we need to lock to not get interrupted between the - * read of the message and write of the ACK. + * We can't unlock here, but interrupt signals with a timetravel_handler + * (see um_request_irq_tt) get to the timetravel_handler anyway. */ if (mode != TTMH_READ) { - bool disabled = irqs_disabled(); + BUG_ON(mode == TTMH_IDLE && !irqs_disabled()); - BUG_ON(mode == TTMH_IDLE && !disabled); - - if (disabled) - local_irq_enable(); while (os_poll(1, &time_travel_ext_fd) != 0) { /* nothing */ } - if (disabled) - local_irq_disable(); } ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg)); @@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time) .time = time, .seq = mseq, }; - unsigned long flags; /* - * We need to save interrupts here and only restore when we - * got the ACK - otherwise we can get interrupted and send - * another request while we're still waiting for an ACK, but - * the peer doesn't know we got interrupted and will send - * the ACKs in the same order as the message, but we'd need - * to see them in the opposite order ... + * We need to block even the timetravel handlers of SIGIO here and + * only restore their use when we got the ACK - otherwise we may + * (will) get interrupted by that, try to queue the IRQ for future + * processing and thus send another request while we're still waiting + * for an ACK, but the peer doesn't know we got interrupted and will + * send the ACKs in the same order as the message, but we'd need to + * see them in the opposite order ... * * This wouldn't matter *too* much, but some ACKs carry the * current time (for UM_TIMETRAVEL_GET) and getting another @@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) * The sequence number assignment that happens here lets us * debug such message handling issues more easily. */ - local_irq_save(flags); + block_signals_hard(); os_write_file(time_travel_ext_fd, &msg, sizeof(msg)); while (msg.op != UM_TIMETRAVEL_ACK) @@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time) if (op == UM_TIMETRAVEL_GET) time_travel_set_time(msg.time); - local_irq_restore(flags); + unblock_signals_hard(); return msg.time; } @@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void) while ((e = list_first_entry_or_null(&time_travel_irqs, struct time_travel_event, list))) { - WARN(e->time != time_travel_time, - "time moved from %lld to %lld before IRQ delivery\n", - time_travel_time, e->time); list_del(&e->list); e->pending = false; e->fn(e); diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 9512253947d5..a149a5e9a16a 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -6,6 +6,7 @@ #include <linux/delay.h> #include <linux/init.h> #include <linux/mm.h> +#include <linux/ctype.h> #include <linux/module.h> #include <linux/panic_notifier.h> #include <linux/seq_file.h> @@ -17,6 +18,7 @@ #include <linux/suspend.h> #include <asm/processor.h> +#include <asm/cpufeature.h> #include <asm/sections.h> #include <asm/setup.h> #include <as-layout.h> @@ -51,9 +53,13 @@ static void __init add_arg(char *arg) */ struct cpuinfo_um boot_cpu_data = { .loops_per_jiffy = 0, - .ipi_pipe = { -1, -1 } + .ipi_pipe = { -1, -1 }, + .cache_alignment = L1_CACHE_BYTES, + .x86_capability = { 0 } }; +EXPORT_SYMBOL(boot_cpu_data); + union thread_union cpu0_irqstack __section(".data..init_irqstack") = { .thread_info = INIT_THREAD_INFO(init_task) }; @@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5]; static int show_cpuinfo(struct seq_file *m, void *v) { - int index = 0; + int i = 0; - seq_printf(m, "processor\t: %d\n", index); + seq_printf(m, "processor\t: %d\n", i); seq_printf(m, "vendor_id\t: User Mode Linux\n"); seq_printf(m, "model name\t: UML\n"); seq_printf(m, "mode\t\t: skas\n"); seq_printf(m, "host\t\t: %s\n", host_info); - seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no"); + seq_printf(m, "flags\t\t:"); + for (i = 0; i < 32*NCAPINTS; i++) + if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL)) + seq_printf(m, " %s", x86_cap_flags[i]); + seq_printf(m, "\n"); + seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment); + seq_printf(m, "bogomips\t: %lu.%02lu\n", loops_per_jiffy/(500000/HZ), (loops_per_jiffy/(5000/HZ)) % 100); + return 0; } @@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem); #define MIN_VMALLOC (32 * 1024 * 1024) +static void parse_host_cpu_flags(char *line) +{ + int i; + for (i = 0; i < 32*NCAPINTS; i++) { + if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i])) + set_cpu_cap(&boot_cpu_data, i); + } +} +static void parse_cache_line(char *line) +{ + long res; + char *to_parse = strstr(line, ":"); + if (to_parse) { + to_parse++; + while (*to_parse != 0 && isspace(*to_parse)) { + to_parse++; + } + if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res)) + boot_cpu_data.cache_alignment = res; + else + boot_cpu_data.cache_alignment = L1_CACHE_BYTES; + } +} + int __init linux_main(int argc, char **argv) { unsigned long avail, diff; @@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv) /* OS sanity checks that need to happen before the kernel runs */ os_early_checks(); + get_host_cpu_features(parse_host_cpu_flags, parse_cache_line); + brk_start = (unsigned long) sbrk(0); /* |