diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-18 09:24:01 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-18 09:24:01 -0800 |
commit | 53861af9a17022898619a2ae4ead0dfc601b7c13 (patch) | |
tree | dc11088d9e86fa1d8d8479974864153a8f976897 /drivers/lguest/x86 | |
parent | 5c2770079fb9b8c5bfb7113d9e76de66e77a0e24 (diff) | |
parent | 5b40a7daf51812b35cf05d1601a779a7043f8414 (diff) | |
download | linux-53861af9a17022898619a2ae4ead0dfc601b7c13.tar.bz2 |
Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux
Pull virtio updates from Rusty Russell:
"OK, this has the big virtio 1.0 implementation, as specified by OASIS.
On top of tht is the major rework of lguest, to use PCI and virtio
1.0, to double-check the implementation.
Then comes the inevitable fixes and cleanups from that work"
* tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (80 commits)
virtio: don't set VIRTIO_CONFIG_S_DRIVER_OK twice.
virtio_net: unconditionally define struct virtio_net_hdr_v1.
tools/lguest: don't use legacy definitions for net device in example launcher.
virtio: Don't expose legacy net features when VIRTIO_NET_NO_LEGACY defined.
tools/lguest: use common error macros in the example launcher.
tools/lguest: give virtqueues names for better error messages
tools/lguest: more documentation and checking of virtio 1.0 compliance.
lguest: don't look in console features to find emerg_wr.
tools/lguest: don't start devices until DRIVER_OK status set.
tools/lguest: handle indirect partway through chain.
tools/lguest: insert driver references from the 1.0 spec (4.1 Virtio Over PCI)
tools/lguest: insert device references from the 1.0 spec (4.1 Virtio Over PCI)
tools/lguest: rename virtio_pci_cfg_cap field to match spec.
tools/lguest: fix features_accepted logic in example launcher.
tools/lguest: handle device reset correctly in example launcher.
virtual: Documentation: simplify and generalize paravirt_ops.txt
lguest: remove NOTIFY call and eventfd facility.
lguest: remove NOTIFY facility from demonstration launcher.
lguest: use the PCI console device's emerg_wr for early boot messages.
lguest: always put console in PCI slot #1.
...
Diffstat (limited to 'drivers/lguest/x86')
-rw-r--r-- | drivers/lguest/x86/core.c | 198 |
1 files changed, 107 insertions, 91 deletions
diff --git a/drivers/lguest/x86/core.c b/drivers/lguest/x86/core.c index 6adfd7ba4c97..30f2aef69d78 100644 --- a/drivers/lguest/x86/core.c +++ b/drivers/lguest/x86/core.c @@ -182,6 +182,52 @@ static void run_guest_once(struct lg_cpu *cpu, struct lguest_pages *pages) } /*:*/ +unsigned long *lguest_arch_regptr(struct lg_cpu *cpu, size_t reg_off, bool any) +{ + switch (reg_off) { + case offsetof(struct pt_regs, bx): + return &cpu->regs->ebx; + case offsetof(struct pt_regs, cx): + return &cpu->regs->ecx; + case offsetof(struct pt_regs, dx): + return &cpu->regs->edx; + case offsetof(struct pt_regs, si): + return &cpu->regs->esi; + case offsetof(struct pt_regs, di): + return &cpu->regs->edi; + case offsetof(struct pt_regs, bp): + return &cpu->regs->ebp; + case offsetof(struct pt_regs, ax): + return &cpu->regs->eax; + case offsetof(struct pt_regs, ip): + return &cpu->regs->eip; + case offsetof(struct pt_regs, sp): + return &cpu->regs->esp; + } + + /* Launcher can read these, but we don't allow any setting. */ + if (any) { + switch (reg_off) { + case offsetof(struct pt_regs, ds): + return &cpu->regs->ds; + case offsetof(struct pt_regs, es): + return &cpu->regs->es; + case offsetof(struct pt_regs, fs): + return &cpu->regs->fs; + case offsetof(struct pt_regs, gs): + return &cpu->regs->gs; + case offsetof(struct pt_regs, cs): + return &cpu->regs->cs; + case offsetof(struct pt_regs, flags): + return &cpu->regs->eflags; + case offsetof(struct pt_regs, ss): + return &cpu->regs->ss; + } + } + + return NULL; +} + /*M:002 * There are hooks in the scheduler which we can register to tell when we * get kicked off the CPU (preempt_notifier_register()). This would allow us @@ -269,110 +315,73 @@ void lguest_arch_run_guest(struct lg_cpu *cpu) * usually attached to a PC. * * When the Guest uses one of these instructions, we get a trap (General - * Protection Fault) and come here. We see if it's one of those troublesome - * instructions and skip over it. We return true if we did. + * Protection Fault) and come here. We queue this to be sent out to the + * Launcher to handle. */ -static int emulate_insn(struct lg_cpu *cpu) -{ - u8 insn; - unsigned int insnlen = 0, in = 0, small_operand = 0; - /* - * The eip contains the *virtual* address of the Guest's instruction: - * walk the Guest's page tables to find the "physical" address. - */ - unsigned long physaddr = guest_pa(cpu, cpu->regs->eip); - - /* - * This must be the Guest kernel trying to do something, not userspace! - * The bottom two bits of the CS segment register are the privilege - * level. - */ - if ((cpu->regs->cs & 3) != GUEST_PL) - return 0; - - /* Decoding x86 instructions is icky. */ - insn = lgread(cpu, physaddr, u8); - /* - * Around 2.6.33, the kernel started using an emulation for the - * cmpxchg8b instruction in early boot on many configurations. This - * code isn't paravirtualized, and it tries to disable interrupts. - * Ignore it, which will Mostly Work. - */ - if (insn == 0xfa) { - /* "cli", or Clear Interrupt Enable instruction. Skip it. */ - cpu->regs->eip++; - return 1; +/* + * The eip contains the *virtual* address of the Guest's instruction: + * we copy the instruction here so the Launcher doesn't have to walk + * the page tables to decode it. We handle the case (eg. in a kernel + * module) where the instruction is over two pages, and the pages are + * virtually but not physically contiguous. + * + * The longest possible x86 instruction is 15 bytes, but we don't handle + * anything that strange. + */ +static void copy_from_guest(struct lg_cpu *cpu, + void *dst, unsigned long vaddr, size_t len) +{ + size_t to_page_end = PAGE_SIZE - (vaddr % PAGE_SIZE); + unsigned long paddr; + + BUG_ON(len > PAGE_SIZE); + + /* If it goes over a page, copy in two parts. */ + if (len > to_page_end) { + /* But make sure the next page is mapped! */ + if (__guest_pa(cpu, vaddr + to_page_end, &paddr)) + copy_from_guest(cpu, dst + to_page_end, + vaddr + to_page_end, + len - to_page_end); + else + /* Otherwise fill with zeroes. */ + memset(dst + to_page_end, 0, len - to_page_end); + len = to_page_end; } - /* - * 0x66 is an "operand prefix". It means a 16, not 32 bit in/out. - */ - if (insn == 0x66) { - small_operand = 1; - /* The instruction is 1 byte so far, read the next byte. */ - insnlen = 1; - insn = lgread(cpu, physaddr + insnlen, u8); - } + /* This will kill the guest if it isn't mapped, but that + * shouldn't happen. */ + __lgread(cpu, dst, guest_pa(cpu, vaddr), len); +} - /* - * We can ignore the lower bit for the moment and decode the 4 opcodes - * we need to emulate. - */ - switch (insn & 0xFE) { - case 0xE4: /* in <next byte>,%al */ - insnlen += 2; - in = 1; - break; - case 0xEC: /* in (%dx),%al */ - insnlen += 1; - in = 1; - break; - case 0xE6: /* out %al,<next byte> */ - insnlen += 2; - break; - case 0xEE: /* out %al,(%dx) */ - insnlen += 1; - break; - default: - /* OK, we don't know what this is, can't emulate. */ - return 0; - } - /* - * If it was an "IN" instruction, they expect the result to be read - * into %eax, so we change %eax. We always return all-ones, which - * traditionally means "there's nothing there". - */ - if (in) { - /* Lower bit tells means it's a 32/16 bit access */ - if (insn & 0x1) { - if (small_operand) - cpu->regs->eax |= 0xFFFF; - else - cpu->regs->eax = 0xFFFFFFFF; - } else - cpu->regs->eax |= 0xFF; - } - /* Finally, we've "done" the instruction, so move past it. */ - cpu->regs->eip += insnlen; - /* Success! */ - return 1; +static void setup_emulate_insn(struct lg_cpu *cpu) +{ + cpu->pending.trap = 13; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); +} + +static void setup_iomem_insn(struct lg_cpu *cpu, unsigned long iomem_addr) +{ + cpu->pending.trap = 14; + cpu->pending.addr = iomem_addr; + copy_from_guest(cpu, cpu->pending.insn, cpu->regs->eip, + sizeof(cpu->pending.insn)); } /*H:050 Once we've re-enabled interrupts, we look at why the Guest exited. */ void lguest_arch_handle_trap(struct lg_cpu *cpu) { + unsigned long iomem_addr; + switch (cpu->regs->trapnum) { case 13: /* We've intercepted a General Protection Fault. */ - /* - * Check if this was one of those annoying IN or OUT - * instructions which we need to emulate. If so, we just go - * back into the Guest after we've done it. - */ + /* Hand to Launcher to emulate those pesky IN and OUT insns */ if (cpu->regs->errcode == 0) { - if (emulate_insn(cpu)) - return; + setup_emulate_insn(cpu); + return; } break; case 14: /* We've intercepted a Page Fault. */ @@ -387,9 +396,16 @@ void lguest_arch_handle_trap(struct lg_cpu *cpu) * whether kernel or userspace code. */ if (demand_page(cpu, cpu->arch.last_pagefault, - cpu->regs->errcode)) + cpu->regs->errcode, &iomem_addr)) return; + /* Was this an access to memory mapped IO? */ + if (iomem_addr) { + /* Tell Launcher, let it handle it. */ + setup_iomem_insn(cpu, iomem_addr); + return; + } + /* * OK, it's really not there (or not OK): the Guest needs to * know. We write out the cr2 value so it knows where the |