summaryrefslogtreecommitdiffstats
path: root/arch/um/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-09 10:19:13 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-07-09 10:19:13 -0700
commitdcf3c935dd9e8e76c9922e88672fa4ad6a8a4df8 (patch)
treef8ce3ab321c70b666e14ed145faacc8b3c0ea82c /arch/um/kernel
parent7a400bf28334fc7734639db3566394e1fc80670c (diff)
parent1aee020155f364ef538370d3392969f1077b9bae (diff)
downloadlinux-dcf3c935dd9e8e76c9922e88672fa4ad6a8a4df8.tar.bz2
Merge tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml
Pull UML updates from Richard Weinberger: - Support for optimized routines based on the host CPU - Support for PCI via virtio - Various fixes * tag 'for-linus-5.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rw/uml: um: remove unneeded semicolon in um_arch.c um: Remove the repeated declaration um: fix error return code in winch_tramp() um: fix error return code in slip_open() um: Fix stack pointer alignment um: implement flush_cache_vmap/flush_cache_vunmap um: add a UML specific futex implementation um: enable the use of optimized xor routines in UML um: Add support for host CPU flags and alignment um: allow not setting extra rpaths in the linux binary um: virtio/pci: enable suspend/resume um: add PCI over virtio emulation driver um: irqs: allow invoking time-travel handler multiple times um: time-travel/signals: fix ndelay() in interrupt um: expose time-travel mode to userspace side um: export signals_enabled directly um: remove unused smp_sigio_handler() declaration lib: add iomem emulation (logic_iomem) um: allow disabling NO_IOMEM
Diffstat (limited to 'arch/um/kernel')
-rw-r--r--arch/um/kernel/Makefile14
-rw-r--r--arch/um/kernel/ioport.c13
-rw-r--r--arch/um/kernel/irq.c52
-rw-r--r--arch/um/kernel/ksyms.c2
-rw-r--r--arch/um/kernel/skas/clone.c2
-rw-r--r--arch/um/kernel/skas/uaccess.c136
-rw-r--r--arch/um/kernel/time.c35
-rw-r--r--arch/um/kernel/um_arch.c48
8 files changed, 258 insertions, 44 deletions
diff --git a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
index e698e0c7dbdc..1d18e4e46989 100644
--- a/arch/um/kernel/Makefile
+++ b/arch/um/kernel/Makefile
@@ -17,18 +17,19 @@ extra-y := vmlinux.lds
obj-y = config.o exec.o exitcode.o irq.o ksyms.o mem.o \
physmem.o process.o ptrace.o reboot.o sigio.o \
signal.o syscall.o sysrq.o time.o tlb.o trap.o \
- um_arch.o umid.o maccess.o kmsg_dump.o skas/
+ um_arch.o umid.o maccess.o kmsg_dump.o capflags.o skas/
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
obj-$(CONFIG_GPROF) += gprof_syms.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
+obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
USER_OBJS := config.o
include arch/um/scripts/Makefile.rules
-targets := config.c config.tmp
+targets := config.c config.tmp capflags.c
# Be careful with the below Sed code - sed is pitfall-rich!
# We use sed to lower build requirements, for "embedded" builders for instance.
@@ -43,6 +44,15 @@ quiet_cmd_quote1 = QUOTE $@
$(obj)/config.c: $(src)/config.c.in $(obj)/config.tmp FORCE
$(call if_changed,quote2)
+quiet_cmd_mkcapflags = MKCAP $@
+ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/../../x86/kernel/cpu/mkcapflags.sh $@ $^
+
+cpufeature = $(src)/../../x86/include/asm/cpufeatures.h
+vmxfeature = $(src)/../../x86/include/asm/vmxfeatures.h
+
+$(obj)/capflags.c: $(cpufeature) $(vmxfeature) $(src)/../../x86/kernel/cpu/mkcapflags.sh FORCE
+ $(call if_changed,mkcapflags)
+
quiet_cmd_quote2 = QUOTE $@
cmd_quote2 = sed -e '/CONFIG/{' \
-e 's/"CONFIG"//' \
diff --git a/arch/um/kernel/ioport.c b/arch/um/kernel/ioport.c
new file mode 100644
index 000000000000..7220615b3beb
--- /dev/null
+++ b/arch/um/kernel/ioport.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <asm/iomap.h>
+#include <asm-generic/pci_iomap.h>
+
+void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
+ unsigned int nr)
+{
+ return NULL;
+}
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 82af5191e73d..a8873d9bc28b 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -56,7 +56,7 @@ struct irq_entry {
static DEFINE_SPINLOCK(irq_lock);
static LIST_HEAD(active_fds);
-static DECLARE_BITMAP(irqs_allocated, NR_IRQS);
+static DECLARE_BITMAP(irqs_allocated, UM_LAST_SIGNAL_IRQ);
static bool irqs_suspended;
static void irq_io_loop(struct irq_reg *irq, struct uml_pt_regs *regs)
@@ -101,10 +101,12 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
if (!reg->timetravel_handler)
return false;
- /* prevent nesting - we'll get it again later when we SIGIO ourselves */
- if (reg->pending_on_resume)
- return true;
-
+ /*
+ * Handle all messages - we might get multiple even while
+ * interrupts are already suspended, due to suspend order
+ * etc. Note that time_travel_add_irq_event() will not add
+ * an event twice, if it's pending already "first wins".
+ */
reg->timetravel_handler(reg->irq, entry->fd, reg->id, &reg->event);
if (!reg->event.pending)
@@ -123,7 +125,8 @@ static bool irq_do_timetravel_handler(struct irq_entry *entry,
#endif
static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type t,
- struct uml_pt_regs *regs)
+ struct uml_pt_regs *regs,
+ bool timetravel_handlers_only)
{
struct irq_reg *reg = &entry->reg[t];
@@ -136,18 +139,29 @@ static void sigio_reg_handler(int idx, struct irq_entry *entry, enum um_irq_type
if (irq_do_timetravel_handler(entry, t))
return;
- if (irqs_suspended)
+ /*
+ * If we're called to only run time-travel handlers then don't
+ * actually proceed but mark sigio as pending (if applicable).
+ * For suspend/resume, timetravel_handlers_only may be true
+ * despite time-travel not being configured and used.
+ */
+ if (timetravel_handlers_only) {
+#ifdef CONFIG_UML_TIME_TRAVEL_SUPPORT
+ mark_sigio_pending();
+#endif
return;
+ }
irq_io_loop(reg, regs);
}
-void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+static void _sigio_handler(struct uml_pt_regs *regs,
+ bool timetravel_handlers_only)
{
struct irq_entry *irq_entry;
int n, i;
- if (irqs_suspended && !um_irq_timetravel_handler_used())
+ if (timetravel_handlers_only && !um_irq_timetravel_handler_used())
return;
while (1) {
@@ -172,14 +186,20 @@ void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
irq_entry = os_epoll_get_data_pointer(i);
for (t = 0; t < NUM_IRQ_TYPES; t++)
- sigio_reg_handler(i, irq_entry, t, regs);
+ sigio_reg_handler(i, irq_entry, t, regs,
+ timetravel_handlers_only);
}
}
- if (!irqs_suspended)
+ if (!timetravel_handlers_only)
free_irqs();
}
+void sigio_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
+{
+ _sigio_handler(regs, irqs_suspended);
+}
+
static struct irq_entry *get_irq_entry_by_fd(int fd)
{
struct irq_entry *walk;
@@ -399,7 +419,8 @@ unsigned int do_IRQ(int irq, struct uml_pt_regs *regs)
void um_free_irq(int irq, void *dev)
{
- if (WARN(irq < 0 || irq > NR_IRQS, "freeing invalid irq %d", irq))
+ if (WARN(irq < 0 || irq > UM_LAST_SIGNAL_IRQ,
+ "freeing invalid irq %d", irq))
return;
free_irq_by_irq_and_dev(irq, dev);
@@ -467,6 +488,11 @@ int um_request_irq_tt(int irq, int fd, enum um_irq_type type,
devname, dev_id, timetravel_handler);
}
EXPORT_SYMBOL(um_request_irq_tt);
+
+void sigio_run_timetravel_handlers(void)
+{
+ _sigio_handler(NULL, true);
+}
#endif
#ifdef CONFIG_PM_SLEEP
@@ -623,7 +649,7 @@ void __init init_IRQ(void)
irq_set_chip_and_handler(TIMER_IRQ, &alarm_irq_type, handle_edge_irq);
- for (i = 1; i < NR_IRQS; i++)
+ for (i = 1; i < UM_LAST_SIGNAL_IRQ; i++)
irq_set_chip_and_handler(i, &normal_irq_type, handle_edge_irq);
/* Initialize EPOLL Loop */
os_setup_epoll();
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index 8ade54a86a7e..b1e5634398d0 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -7,7 +7,7 @@
#include <os.h>
EXPORT_SYMBOL(set_signals);
-EXPORT_SYMBOL(get_signals);
+EXPORT_SYMBOL(signals_enabled);
EXPORT_SYMBOL(os_stat_fd);
EXPORT_SYMBOL(os_stat_file);
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index 592cdb138441..5afac0fef24e 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -29,7 +29,7 @@ stub_clone_handler(void)
long err;
err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
- (unsigned long)data + UM_KERN_PAGE_SIZE / 2 - sizeof(void *));
+ (unsigned long)data + UM_KERN_PAGE_SIZE / 2);
if (err) {
data->parent_err = err;
goto done;
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index 2dec915abe6f..6c76df96e858 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -11,6 +11,7 @@
#include <asm/current.h>
#include <asm/page.h>
#include <kern_util.h>
+#include <asm/futex.h>
#include <os.h>
pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
@@ -248,3 +249,138 @@ long __strnlen_user(const void __user *str, long len)
return 0;
}
EXPORT_SYMBOL(__strnlen_user);
+
+/**
+ * arch_futex_atomic_op_inuser() - Atomic arithmetic operation with constant
+ * argument and comparison of the previous
+ * futex value with another constant.
+ *
+ * @encoded_op: encoded operation to execute
+ * @uaddr: pointer to user space address
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Operation not supported
+ */
+
+int arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
+{
+ int oldval, ret;
+ struct page *page;
+ unsigned long addr = (unsigned long) uaddr;
+ pte_t *pte;
+
+ ret = -EFAULT;
+ if (!access_ok(uaddr, sizeof(*uaddr)))
+ return -EFAULT;
+ preempt_disable();
+ pte = maybe_map(addr, 1);
+ if (pte == NULL)
+ goto out_inuser;
+
+ page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+ pagefault_disable();
+ addr = (unsigned long) page_address(page) +
+ (((unsigned long) addr) & ~PAGE_MASK);
+#else
+ addr = (unsigned long) kmap_atomic(page) +
+ ((unsigned long) addr & ~PAGE_MASK);
+#endif
+ uaddr = (u32 *) addr;
+ oldval = *uaddr;
+
+ ret = 0;
+
+ switch (op) {
+ case FUTEX_OP_SET:
+ *uaddr = oparg;
+ break;
+ case FUTEX_OP_ADD:
+ *uaddr += oparg;
+ break;
+ case FUTEX_OP_OR:
+ *uaddr |= oparg;
+ break;
+ case FUTEX_OP_ANDN:
+ *uaddr &= ~oparg;
+ break;
+ case FUTEX_OP_XOR:
+ *uaddr ^= oparg;
+ break;
+ default:
+ ret = -ENOSYS;
+ }
+#ifdef CONFIG_64BIT
+ pagefault_enable();
+#else
+ kunmap_atomic((void *)addr);
+#endif
+
+out_inuser:
+ preempt_enable();
+
+ if (ret == 0)
+ *oval = oldval;
+
+ return ret;
+}
+EXPORT_SYMBOL(arch_futex_atomic_op_inuser);
+
+/**
+ * futex_atomic_cmpxchg_inatomic() - Compare and exchange the content of the
+ * uaddr with newval if the current value is
+ * oldval.
+ * @uval: pointer to store content of @uaddr
+ * @uaddr: pointer to user space address
+ * @oldval: old value
+ * @newval: new value to store to @uaddr
+ *
+ * Return:
+ * 0 - On success
+ * -EFAULT - User access resulted in a page fault
+ * -EAGAIN - Atomic operation was unable to complete due to contention
+ * -ENOSYS - Function not implemented (only if !HAVE_FUTEX_CMPXCHG)
+ */
+
+int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+ u32 oldval, u32 newval)
+{
+ struct page *page;
+ pte_t *pte;
+ int ret = -EFAULT;
+
+ if (!access_ok(uaddr, sizeof(*uaddr)))
+ return -EFAULT;
+
+ preempt_disable();
+ pte = maybe_map((unsigned long) uaddr, 1);
+ if (pte == NULL)
+ goto out_inatomic;
+
+ page = pte_page(*pte);
+#ifdef CONFIG_64BIT
+ pagefault_disable();
+ uaddr = page_address(page) + (((unsigned long) uaddr) & ~PAGE_MASK);
+#else
+ uaddr = kmap_atomic(page) + ((unsigned long) uaddr & ~PAGE_MASK);
+#endif
+
+ *uval = *uaddr;
+
+ ret = cmpxchg(uaddr, oldval, newval);
+
+#ifdef CONFIG_64BIT
+ pagefault_enable();
+#else
+ kunmap_atomic(uaddr);
+#endif
+ ret = 0;
+
+out_inatomic:
+ preempt_enable();
+ return ret;
+}
+EXPORT_SYMBOL(futex_atomic_cmpxchg_inatomic);
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index e0cdb9694fb8..fddd1dec27e6 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -68,23 +68,15 @@ static void time_travel_handle_message(struct um_timetravel_msg *msg,
int ret;
/*
- * Poll outside the locked section (if we're not called to only read
- * the response) so we can get interrupts for e.g. virtio while we're
- * here, but then we need to lock to not get interrupted between the
- * read of the message and write of the ACK.
+ * We can't unlock here, but interrupt signals with a timetravel_handler
+ * (see um_request_irq_tt) get to the timetravel_handler anyway.
*/
if (mode != TTMH_READ) {
- bool disabled = irqs_disabled();
+ BUG_ON(mode == TTMH_IDLE && !irqs_disabled());
- BUG_ON(mode == TTMH_IDLE && !disabled);
-
- if (disabled)
- local_irq_enable();
while (os_poll(1, &time_travel_ext_fd) != 0) {
/* nothing */
}
- if (disabled)
- local_irq_disable();
}
ret = os_read_file(time_travel_ext_fd, msg, sizeof(*msg));
@@ -123,15 +115,15 @@ static u64 time_travel_ext_req(u32 op, u64 time)
.time = time,
.seq = mseq,
};
- unsigned long flags;
/*
- * We need to save interrupts here and only restore when we
- * got the ACK - otherwise we can get interrupted and send
- * another request while we're still waiting for an ACK, but
- * the peer doesn't know we got interrupted and will send
- * the ACKs in the same order as the message, but we'd need
- * to see them in the opposite order ...
+ * We need to block even the timetravel handlers of SIGIO here and
+ * only restore their use when we got the ACK - otherwise we may
+ * (will) get interrupted by that, try to queue the IRQ for future
+ * processing and thus send another request while we're still waiting
+ * for an ACK, but the peer doesn't know we got interrupted and will
+ * send the ACKs in the same order as the message, but we'd need to
+ * see them in the opposite order ...
*
* This wouldn't matter *too* much, but some ACKs carry the
* current time (for UM_TIMETRAVEL_GET) and getting another
@@ -140,7 +132,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
* The sequence number assignment that happens here lets us
* debug such message handling issues more easily.
*/
- local_irq_save(flags);
+ block_signals_hard();
os_write_file(time_travel_ext_fd, &msg, sizeof(msg));
while (msg.op != UM_TIMETRAVEL_ACK)
@@ -152,7 +144,7 @@ static u64 time_travel_ext_req(u32 op, u64 time)
if (op == UM_TIMETRAVEL_GET)
time_travel_set_time(msg.time);
- local_irq_restore(flags);
+ unblock_signals_hard();
return msg.time;
}
@@ -352,9 +344,6 @@ void deliver_time_travel_irqs(void)
while ((e = list_first_entry_or_null(&time_travel_irqs,
struct time_travel_event,
list))) {
- WARN(e->time != time_travel_time,
- "time moved from %lld to %lld before IRQ delivery\n",
- time_travel_time, e->time);
list_del(&e->list);
e->pending = false;
e->fn(e);
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 9512253947d5..a149a5e9a16a 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -6,6 +6,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/mm.h>
+#include <linux/ctype.h>
#include <linux/module.h>
#include <linux/panic_notifier.h>
#include <linux/seq_file.h>
@@ -17,6 +18,7 @@
#include <linux/suspend.h>
#include <asm/processor.h>
+#include <asm/cpufeature.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <as-layout.h>
@@ -51,9 +53,13 @@ static void __init add_arg(char *arg)
*/
struct cpuinfo_um boot_cpu_data = {
.loops_per_jiffy = 0,
- .ipi_pipe = { -1, -1 }
+ .ipi_pipe = { -1, -1 },
+ .cache_alignment = L1_CACHE_BYTES,
+ .x86_capability = { 0 }
};
+EXPORT_SYMBOL(boot_cpu_data);
+
union thread_union cpu0_irqstack
__section(".data..init_irqstack") =
{ .thread_info = INIT_THREAD_INFO(init_task) };
@@ -63,17 +69,25 @@ static char host_info[(__NEW_UTS_LEN + 1) * 5];
static int show_cpuinfo(struct seq_file *m, void *v)
{
- int index = 0;
+ int i = 0;
- seq_printf(m, "processor\t: %d\n", index);
+ seq_printf(m, "processor\t: %d\n", i);
seq_printf(m, "vendor_id\t: User Mode Linux\n");
seq_printf(m, "model name\t: UML\n");
seq_printf(m, "mode\t\t: skas\n");
seq_printf(m, "host\t\t: %s\n", host_info);
- seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ seq_printf(m, "fpu\t\t: %s\n", cpu_has(&boot_cpu_data, X86_FEATURE_FPU) ? "yes" : "no");
+ seq_printf(m, "flags\t\t:");
+ for (i = 0; i < 32*NCAPINTS; i++)
+ if (cpu_has(&boot_cpu_data, i) && (x86_cap_flags[i] != NULL))
+ seq_printf(m, " %s", x86_cap_flags[i]);
+ seq_printf(m, "\n");
+ seq_printf(m, "cache_alignment\t: %d\n", boot_cpu_data.cache_alignment);
+ seq_printf(m, "bogomips\t: %lu.%02lu\n",
loops_per_jiffy/(500000/HZ),
(loops_per_jiffy/(5000/HZ)) % 100);
+
return 0;
}
@@ -262,6 +276,30 @@ EXPORT_SYMBOL(end_iomem);
#define MIN_VMALLOC (32 * 1024 * 1024)
+static void parse_host_cpu_flags(char *line)
+{
+ int i;
+ for (i = 0; i < 32*NCAPINTS; i++) {
+ if ((x86_cap_flags[i] != NULL) && strstr(line, x86_cap_flags[i]))
+ set_cpu_cap(&boot_cpu_data, i);
+ }
+}
+static void parse_cache_line(char *line)
+{
+ long res;
+ char *to_parse = strstr(line, ":");
+ if (to_parse) {
+ to_parse++;
+ while (*to_parse != 0 && isspace(*to_parse)) {
+ to_parse++;
+ }
+ if (kstrtoul(to_parse, 10, &res) == 0 && is_power_of_2(res))
+ boot_cpu_data.cache_alignment = res;
+ else
+ boot_cpu_data.cache_alignment = L1_CACHE_BYTES;
+ }
+}
+
int __init linux_main(int argc, char **argv)
{
unsigned long avail, diff;
@@ -298,6 +336,8 @@ int __init linux_main(int argc, char **argv)
/* OS sanity checks that need to happen before the kernel runs */
os_early_checks();
+ get_host_cpu_features(parse_host_cpu_flags, parse_cache_line);
+
brk_start = (unsigned long) sbrk(0);
/*