diff options
Diffstat (limited to 'tools')
-rw-r--r-- | tools/lguest/lguest.c | 355 |
1 files changed, 22 insertions, 333 deletions
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c index 663166aff1f5..b5ac73525f6d 100644 --- a/tools/lguest/lguest.c +++ b/tools/lguest/lguest.c @@ -117,14 +117,6 @@ struct device_list { /* Counter to print out convenient device numbers. */ unsigned int device_num; - /* The descriptor page for the devices. */ - u8 *descpage; - - /* A single linked list of devices. */ - struct device *dev; - /* And a pointer to the last device for easy append. */ - struct device *lastdev; - /* PCI devices. */ struct device *pci[MAX_PCI_DEVICES]; }; @@ -170,16 +162,6 @@ struct pci_config { /* The device structure describes a single device. */ struct device { - /* The linked-list pointer. */ - struct device *next; - - /* The device's descriptor, as mapped into the Guest. */ - struct lguest_device_desc *desc; - - /* We can't trust desc values once Guest has booted: we use these. */ - unsigned int feature_len; - unsigned int num_vq; - /* The name of this device, for --verbose. */ const char *name; @@ -216,9 +198,6 @@ struct virtqueue { /* Which device owns me. */ struct device *dev; - /* The configuration for this queue. */ - struct lguest_vqconfig config; - /* The actual ring of buffers. */ struct vring vring; @@ -301,13 +280,6 @@ static void iov_consume(struct iovec iov[], unsigned num_iov, errx(1, "iovec too short!"); } -/* The device virtqueue descriptors are followed by feature bitmasks. */ -static u8 *get_feature_bits(struct device *dev) -{ - return (u8 *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig); -} - /*L:100 * The Launcher code itself takes us out into userspace, that scary place where * pointers run wild and free! Unfortunately, like most userspace programs, @@ -378,17 +350,6 @@ static void *map_zeroed_pages(unsigned int num) return addr + getpagesize(); } -/* Get some more pages for a device. */ -static void *get_pages(unsigned int num) -{ - void *addr = from_guest_phys(guest_limit); - - guest_limit += num * getpagesize(); - if (guest_limit > guest_max) - errx(1, "Not enough memory for devices"); - return addr; -} - /* Get some bytes which won't be mapped into the guest. */ static unsigned long get_mmio_region(size_t size) { @@ -701,7 +662,7 @@ static unsigned next_desc(struct vring_desc *desc, */ static void trigger_irq(struct virtqueue *vq) { - unsigned long buf[] = { LHREQ_IRQ, vq->config.irq }; + unsigned long buf[] = { LHREQ_IRQ, vq->dev->config.irq_line }; /* Don't inform them if nothing used. */ if (!vq->pending_used) @@ -713,13 +674,12 @@ static void trigger_irq(struct virtqueue *vq) return; } - /* For a PCI device, set isr to 1 (queue interrupt pending) */ - if (vq->dev->mmio) - vq->dev->mmio->isr = 0x1; + /* Set isr to 1 (queue interrupt pending) */ + vq->dev->mmio->isr = 0x1; /* Send the Guest an interrupt tell them we used something up. */ if (write(lguest_fd, buf, sizeof(buf)) != 0) - err(1, "Triggering irq %i", vq->config.irq); + err(1, "Triggering irq %i", vq->dev->config.irq_line); } /* @@ -1085,21 +1045,18 @@ static void reset_device(struct device *dev) verbose("Resetting device %s\n", dev->name); /* Clear any features they've acked. */ - memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len); + dev->features_accepted = 0; /* We're going to be explicitly killing threads, so ignore them. */ signal(SIGCHLD, SIG_IGN); - /* Zero out the virtqueues, get rid of their threads */ + /* Get rid of the virtqueue threads */ for (vq = dev->vq; vq; vq = vq->next) { if (vq->thread != (pid_t)-1) { kill(vq->thread, SIGTERM); waitpid(vq->thread, NULL, 0); vq->thread = (pid_t)-1; } - memset(vq->vring.desc, 0, - vring_size(vq->config.num, LGUEST_VRING_ALIGN)); - lg_last_avail(vq) = 0; } dev->running = false; @@ -1107,122 +1064,27 @@ static void reset_device(struct device *dev) signal(SIGCHLD, (void *)kill_launcher); } -/*L:216 - * This actually creates the thread which services the virtqueue for a device. - */ -static void create_thread(struct virtqueue *vq) -{ - /* - * Create stack for thread. Since the stack grows upwards, we point - * the stack pointer to the end of this region. - */ - char *stack = malloc(32768); - unsigned long args[] = { LHREQ_EVENTFD, - vq->config.pfn*getpagesize(), 0 }; - - /* Create a zero-initialized eventfd. */ - vq->eventfd = eventfd(0, 0); - if (vq->eventfd < 0) - err(1, "Creating eventfd"); - args[2] = vq->eventfd; - - /* - * Attach an eventfd to this virtqueue: it will go off when the Guest - * does an LHCALL_NOTIFY for this vq. - */ - if (write(lguest_fd, &args, sizeof(args)) != 0) - err(1, "Attaching eventfd"); - - /* - * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so - * we get a signal if it dies. - */ - vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq); - if (vq->thread == (pid_t)-1) - err(1, "Creating clone"); - - /* We close our local copy now the child has it. */ - close(vq->eventfd); -} - -static void start_device(struct device *dev) +static void cleanup_devices(void) { unsigned int i; - struct virtqueue *vq; - - verbose("Device %s OK: offered", dev->name); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev)[i]); - verbose(", accepted"); - for (i = 0; i < dev->feature_len; i++) - verbose(" %02x", get_feature_bits(dev) - [dev->feature_len+i]); - for (vq = dev->vq; vq; vq = vq->next) { - if (vq->service) - create_thread(vq); + for (i = 1; i < MAX_PCI_DEVICES; i++) { + struct device *d = devices.pci[i]; + if (!d) + continue; + reset_device(d); } - dev->running = true; -} - -static void cleanup_devices(void) -{ - struct device *dev; - - for (dev = devices.dev; dev; dev = dev->next) - reset_device(dev); /* If we saved off the original terminal settings, restore them now. */ if (orig_term.c_lflag & (ISIG|ICANON|ECHO)) tcsetattr(STDIN_FILENO, TCSANOW, &orig_term); } -/* When the Guest tells us they updated the status field, we handle it. */ -static void update_device_status(struct device *dev) -{ - /* A zero status is a reset, otherwise it's a set of flags. */ - if (dev->desc->status == 0) - reset_device(dev); - else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) { - warnx("Device %s configuration FAILED", dev->name); - if (dev->running) - reset_device(dev); - } else { - if (dev->running) - err(1, "Device %s features finalized twice", dev->name); - start_device(dev); - } -} - /*L:215 - * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In - * particular, it's used to notify us of device status changes during boot. + * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. */ static void handle_output(unsigned long addr) { - struct device *i; - - /* Check each device. */ - for (i = devices.dev; i; i = i->next) { - struct virtqueue *vq; - - /* - * Notifications to device descriptors mean they updated the - * device status. - */ - if (from_guest_phys(addr) == i->desc) { - update_device_status(i); - return; - } - - /* Devices should not be used before features are finalized. */ - for (vq = i->vq; vq; vq = vq->next) { - if (addr != vq->config.pfn*getpagesize()) - continue; - errx(1, "Notification on %s before setup!", i->name); - } - } - /* * Early console write is done using notify on a nul-terminated string * in Guest memory. It's also great for hacking debugging messages @@ -1736,11 +1598,6 @@ static void enable_virtqueue(struct device *d, struct virtqueue *vq) err(1, "Creating clone"); } -static void reset_pci_device(struct device *dev) -{ - /* FIXME */ -} - static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask) { struct virtqueue *vq; @@ -1775,7 +1632,7 @@ static void emulate_mmio_write(struct device *d, u32 off, u32 val, u32 mask) case offsetof(struct virtio_pci_mmio, cfg.device_status): verbose("%s: device status -> %#x\n", d->name, val); if (val == 0) - reset_pci_device(d); + reset_device(d); goto write_through8; case offsetof(struct virtio_pci_mmio, cfg.queue_select): vq = vq_by_num(d, val); @@ -1986,102 +1843,6 @@ static void emulate_mmio(unsigned long paddr, const u8 *insn) * device" so the Launcher can keep track of it. We have common helper * routines to allocate and manage them. */ - -/* - * The layout of the device page is a "struct lguest_device_desc" followed by a - * number of virtqueue descriptors, then two sets of feature bits, then an - * array of configuration bytes. This routine returns the configuration - * pointer. - */ -static u8 *device_config(const struct device *dev) -{ - return (void *)(dev->desc + 1) - + dev->num_vq * sizeof(struct lguest_vqconfig) - + dev->feature_len * 2; -} - -/* - * This routine allocates a new "struct lguest_device_desc" from descriptor - * table page just above the Guest's normal memory. It returns a pointer to - * that descriptor. - */ -static struct lguest_device_desc *new_dev_desc(u16 type) -{ - struct lguest_device_desc d = { .type = type }; - void *p; - - /* Figure out where the next device config is, based on the last one. */ - if (devices.lastdev) - p = device_config(devices.lastdev) - + devices.lastdev->desc->config_len; - else - p = devices.descpage; - - /* We only have one page for all the descriptors. */ - if (p + sizeof(d) > (void *)devices.descpage + getpagesize()) - errx(1, "Too many devices"); - - /* p might not be aligned, so we memcpy in. */ - return memcpy(p, &d, sizeof(d)); -} - -/* - * Each device descriptor is followed by the description of its virtqueues. We - * specify how many descriptors the virtqueue is to have. - */ -static void add_virtqueue(struct device *dev, unsigned int num_descs, - void (*service)(struct virtqueue *)) -{ - unsigned int pages; - struct virtqueue **i, *vq = malloc(sizeof(*vq)); - void *p; - - /* First we need some memory for this virtqueue. */ - pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1) - / getpagesize(); - p = get_pages(pages); - - /* Initialize the virtqueue */ - vq->next = NULL; - vq->last_avail_idx = 0; - vq->dev = dev; - - /* - * This is the routine the service thread will run, and its Process ID - * once it's running. - */ - vq->service = service; - vq->thread = (pid_t)-1; - - /* Initialize the configuration. */ - vq->config.num = num_descs; - vq->config.irq = devices.next_irq++; - vq->config.pfn = to_guest_phys(p) / getpagesize(); - - /* Initialize the vring. */ - vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN); - - /* - * Append virtqueue to this device's descriptor. We use - * device_config() to get the end of the device's current virtqueues; - * we check that we haven't added any config or feature information - * yet, otherwise we'd be overwriting them. - */ - assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0); - memcpy(device_config(dev), &vq->config, sizeof(vq->config)); - dev->num_vq++; - dev->desc->num_vq++; - - verbose("Virtqueue page %#lx\n", to_guest_phys(p)); - - /* - * Add to tail of list, so dev->vq is first vq, dev->vq->next is - * second. - */ - for (i = &dev->vq; *i; i = &(*i)->next); - *i = vq; -} - static void add_pci_virtqueue(struct device *dev, void (*service)(struct virtqueue *)) { @@ -2107,9 +1868,6 @@ static void add_pci_virtqueue(struct device *dev, /* Add one to the number of queues */ vq->dev->mmio->cfg.num_queues++; - /* FIXME: Do irq per virtqueue, not per device. */ - vq->config.irq = vq->dev->config.irq_line; - /* * Add to tail of list, so dev->vq is first vq, dev->vq->next is * second. @@ -2118,47 +1876,12 @@ static void add_pci_virtqueue(struct device *dev, *i = vq; } -/* - * The first half of the feature bitmask is for us to advertise features. The - * second half is for the Guest to accept features. - */ -static void add_feature(struct device *dev, unsigned bit) -{ - u8 *features = get_feature_bits(dev); - - /* We can't extend the feature bits once we've added config bytes */ - if (dev->desc->feature_len <= bit / CHAR_BIT) { - assert(dev->desc->config_len == 0); - dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1; - } - - features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT)); -} - +/* The Guest accesses the feature bits via the PCI common config MMIO region */ static void add_pci_feature(struct device *dev, unsigned bit) { dev->features |= (1ULL << bit); } -/* - * This routine sets the configuration fields for an existing device's - * descriptor. It only works for the last device, but that's OK because that's - * how we use it. - */ -static void set_config(struct device *dev, unsigned len, const void *conf) -{ - /* Check we haven't overflowed our single page. */ - if (device_config(dev) + len > devices.descpage + getpagesize()) - errx(1, "Too many devices"); - - /* Copy in the config information, and store the length. */ - memcpy(device_config(dev), conf, len); - dev->desc->config_len = len; - - /* Size must fit in config_len field (8 bits)! */ - assert(dev->desc->config_len == len); -} - /* For devices with no config. */ static void no_device_config(struct device *dev) { @@ -2287,59 +2010,28 @@ static void init_pci_config(struct pci_config *pci, u16 type, } /* - * This routine does all the creation and setup of a new device, including - * calling new_dev_desc() to allocate the descriptor and device memory. We - * don't actually start the service threads until later. + * This routine does all the creation and setup of a new device, but we don't + * actually place the MMIO region until we know the size (if any) of the + * device-specific config. And we don't actually start the service threads + * until later. * * See what I mean about userspace being boring? */ -static struct device *new_device(const char *name, u16 type) -{ - struct device *dev = malloc(sizeof(*dev)); - - /* Now we populate the fields one at a time. */ - dev->desc = new_dev_desc(type); - dev->name = name; - dev->vq = NULL; - dev->feature_len = 0; - dev->num_vq = 0; - dev->running = false; - dev->next = NULL; - - /* - * Append to device list. Prepending to a single-linked list is - * easier, but the user expects the devices to be arranged on the bus - * in command-line order. The first network device on the command line - * is eth0, the first block device /dev/vda, etc. - */ - if (devices.lastdev) - devices.lastdev->next = dev; - else - devices.dev = dev; - devices.lastdev = dev; - - return dev; -} - static struct device *new_pci_device(const char *name, u16 type, u8 class, u8 subclass) { struct device *dev = malloc(sizeof(*dev)); /* Now we populate the fields one at a time. */ - dev->desc = NULL; dev->name = name; dev->vq = NULL; - dev->feature_len = 0; - dev->num_vq = 0; dev->running = false; - dev->next = NULL; dev->mmio_size = sizeof(struct virtio_pci_mmio); dev->mmio = calloc(1, dev->mmio_size); dev->features = (u64)1 << VIRTIO_F_VERSION_1; dev->features_accepted = 0; - if (devices.device_num + 1 >= 32) + if (devices.device_num + 1 >= MAX_PCI_DEVICES) errx(1, "Can only handle 31 PCI devices"); init_pci_config(&dev->config, type, class, subclass); @@ -2940,11 +2632,9 @@ int main(int argc, char *argv[]) main_args = argv; /* - * First we initialize the device list. We keep a pointer to the last - * device, and the next interrupt number to use for devices (1: - * remember that 0 is used by the timer). + * First we initialize the device list. We remember next interrupt + * number to use for devices (1: remember that 0 is used by the timer). */ - devices.lastdev = NULL; devices.next_irq = 1; /* We're CPU 0. In fact, that's the only CPU possible right now. */ @@ -2969,7 +2659,6 @@ int main(int argc, char *argv[]) + DEVICE_PAGES); guest_limit = mem; guest_max = guest_mmio = mem + DEVICE_PAGES*getpagesize(); - devices.descpage = get_pages(1); break; } } |