From 6725f21157b4b6a9fe689cdf07b040d21ea536dd Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:47 +0100 Subject: virtio-mem: determine nid only once using memory_add_physaddr_to_nid() Let's determine the target nid only once in case we have none specified - usually, we'll end up with node 0 either way. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-2-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 181e2f18beae..a37fd73588da 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -70,7 +70,7 @@ struct virtio_mem { /* The device block size (for communicating with the device). */ uint64_t device_block_size; - /* The translated node id. NUMA_NO_NODE in case not specified. */ + /* The determined node id for all memory of the device. */ int nid; /* Physical start address of the memory region. */ uint64_t addr; @@ -406,10 +406,6 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); /* * When force-unloading the driver and we still have memory added to @@ -423,7 +419,8 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) } dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory_driver_managed(nid, addr, memory_block_size_bytes(), + return add_memory_driver_managed(vm->nid, addr, + memory_block_size_bytes(), vm->resource_name, MEMHP_MERGE_RESOURCE); } @@ -440,13 +437,9 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - return remove_memory(nid, addr, memory_block_size_bytes()); + return remove_memory(vm->nid, addr, memory_block_size_bytes()); } /* @@ -461,14 +454,11 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - int nid = vm->nid; - - if (nid == NUMA_NO_NODE) - nid = memory_add_physaddr_to_nid(addr); dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - return offline_and_remove_memory(nid, addr, memory_block_size_bytes()); + return offline_and_remove_memory(vm->nid, addr, + memory_block_size_bytes()); } /* @@ -1659,6 +1649,10 @@ static int virtio_mem_init(struct virtio_mem *vm) virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, &vm->region_size); + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); + /* * We always hotplug memory in memory block granularity. This way, * we have to wait for exactly one memory block to online. @@ -1707,7 +1701,7 @@ static int virtio_mem_init(struct virtio_mem *vm) memory_block_size_bytes()); dev_info(&vm->vdev->dev, "subblock size: 0x%llx", (unsigned long long)vm->subblock_size); - if (vm->nid != NUMA_NO_NODE) + if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); return 0; -- cgit v1.2.3 From 347202dc04a110bdab8d4e1c38ceccd7758fe13e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:48 +0100 Subject: virtio-mem: more precise calculation in virtio_mem_mb_state_prepare_next_mb() We actually need one byte less (next_mb_id is exclusive, first_mb_id is inclusive). While at it, compact the code. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-3-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a37fd73588da..dee46865bae2 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -257,10 +257,8 @@ static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm, */ static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) { - unsigned long old_bytes = vm->next_mb_id - vm->first_mb_id + 1; - unsigned long new_bytes = vm->next_mb_id - vm->first_mb_id + 2; - int old_pages = PFN_UP(old_bytes); - int new_pages = PFN_UP(new_bytes); + int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); + int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); uint8_t *new_mb_state; if (vm->mb_state && old_pages == new_pages) -- cgit v1.2.3 From 20b9150225c8e9599999b4e161192d8a8d56a4cb Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:49 +0100 Subject: virtio-mem: simplify MAX_ORDER - 1 / pageblock_order handling Let's use pageblock_nr_pages and MAX_ORDER_NR_PAGES instead where possible to simplify. Add a comment why we have that restriction for now. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-4-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index dee46865bae2..0f9d854e8e42 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -753,14 +753,15 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, */ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) { - const int order = MAX_ORDER - 1; + const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; int i; /* - * We are always called with subblock granularity, which is at least - * aligned to MAX_ORDER - 1. + * We are always called at least with MAX_ORDER_NR_PAGES + * granularity/alignment (e.g., the way subblocks work). All pages + * inside such a block are alike. */ - for (i = 0; i < nr_pages; i += 1 << order) { + for (i = 0; i < nr_pages; i += max_nr_pages) { struct page *page = pfn_to_page(pfn + i); /* @@ -770,14 +771,14 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) * alike. */ if (PageDirty(page)) { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, false); - generic_online_page(page, order); + generic_online_page(page, MAX_ORDER - 1); } else { - virtio_mem_clear_fake_offline(pfn + i, 1 << order, + virtio_mem_clear_fake_offline(pfn + i, max_nr_pages, true); - free_contig_range(pfn + i, 1 << order); - adjust_managed_page_count(page, 1 << order); + free_contig_range(pfn + i, max_nr_pages); + adjust_managed_page_count(page, max_nr_pages); } } } @@ -790,7 +791,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) int sb_id; /* - * We exploit here that subblocks have at least MAX_ORDER - 1 + * We exploit here that subblocks have at least MAX_ORDER_NR_PAGES. * size/alignment and that this callback is is called with such a * size/alignment. So we cannot cross subblocks and therefore * also not memory blocks. @@ -1673,13 +1674,15 @@ static int virtio_mem_init(struct virtio_mem *vm) "Some memory is not addressable. This can make some memory unusable.\n"); /* - * Calculate the subblock size: - * - At least MAX_ORDER - 1 / pageblock_order. - * - At least the device block size. - * In the worst case, a single subblock per memory block. + * We want subblocks to span at least MAX_ORDER_NR_PAGES and + * pageblock_nr_pages pages. This: + * - Simplifies our page onlining code (virtio_mem_online_page_cb) + * and fake page onlining code (virtio_mem_fake_online). + * - Is required for now for alloc_contig_range() to work reliably - + * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->subblock_size = PAGE_SIZE * 1ul << max_t(uint32_t, MAX_ORDER - 1, - pageblock_order); + vm->subblock_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; vm->subblock_size = max_t(uint64_t, vm->device_block_size, vm->subblock_size); vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size; -- cgit v1.2.3 From d76944f80d5f500c8be74feb7938edddf68ee931 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:50 +0100 Subject: virtio-mem: drop rc2 in virtio_mem_mb_plug_and_add() We can drop rc2, we don't actually need the value. Reviewed-by: Pankaj Gupta Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-5-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 0f9d854e8e42..4f18d9855a0e 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1070,7 +1070,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, uint64_t *nb_sb) { const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb); - int rc, rc2; + int rc; if (WARN_ON_ONCE(!count)) return -EINVAL; @@ -1101,13 +1101,12 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, dev_err(&vm->vdev->dev, "adding memory block %lu failed with %d\n", mb_id, rc); - rc2 = virtio_mem_mb_unplug_sb(vm, mb_id, 0, count); /* * TODO: Linux MM does not properly clean up yet in all cases * where adding of memory failed - especially on -ENOMEM. */ - if (rc2) + if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_MB_STATE_PLUGGED; virtio_mem_mb_set_state(vm, mb_id, new_state); return rc; -- cgit v1.2.3 From 2a6285114bc543b70612e2bc0fcf13d2dd6ce5b9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:51 +0100 Subject: virtio-mem: use "unsigned long" for nr_pages when fake onlining/offlining No harm done, but let's be consistent. Reviewed-by: Pankaj Gupta Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-6-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4f18d9855a0e..94451b401fba 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -717,7 +717,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, * (via generic_online_page()) using PageDirty(). */ static void virtio_mem_set_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -736,7 +736,7 @@ static void virtio_mem_set_fake_offline(unsigned long pfn, * (via generic_online_page()), clear PageDirty(). */ static void virtio_mem_clear_fake_offline(unsigned long pfn, - unsigned int nr_pages, bool onlined) + unsigned long nr_pages, bool onlined) { for (; nr_pages--; pfn++) { struct page *page = pfn_to_page(pfn); @@ -751,10 +751,10 @@ static void virtio_mem_clear_fake_offline(unsigned long pfn, * Release a range of fake-offline pages to the buddy, effectively * fake-onlining them. */ -static void virtio_mem_fake_online(unsigned long pfn, unsigned int nr_pages) +static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) { const unsigned long max_nr_pages = MAX_ORDER_NR_PAGES; - int i; + unsigned long i; /* * We are always called at least with MAX_ORDER_NR_PAGES -- cgit v1.2.3 From 41e6215c6d29a7bbcee599411cdf0911fde1f09b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:52 +0100 Subject: virtio-mem: factor out calculation of the bit number within the subblock bitmap The calculation is already complicated enough, let's limit it to one location. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-7-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 94451b401fba..30b4d07f5263 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -290,6 +290,16 @@ static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) _mb_id--) \ if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) +/* + * Calculate the bit number in the subblock bitmap for the given subblock + * inside the given memory block. + */ +static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, + unsigned long mb_id, int sb_id) +{ + return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; +} + /* * Mark all selected subblocks plugged. * @@ -299,7 +309,7 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); __bitmap_set(vm->sb_bitmap, bit, count); } @@ -313,7 +323,7 @@ static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); __bitmap_clear(vm->sb_bitmap, bit, count); } @@ -325,7 +335,7 @@ static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); if (count == 1) return test_bit(bit, vm->sb_bitmap); @@ -342,7 +352,7 @@ static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); /* TODO: Helper similar to bitmap_set() */ return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count; @@ -355,7 +365,7 @@ static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm, unsigned long mb_id) { - const int bit = (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb; + const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, 0); return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) - bit; -- cgit v1.2.3 From 6beb3a9421fd81d36bd4d87a6b307fc744ea9dd2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:53 +0100 Subject: virtio-mem: print debug messages from virtio_mem_send_*_request() Let's move the existing dev_dbg() into the functions, print if something went wrong, and also print for virtio_mem_send_unplug_all_request(). Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-8-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 50 +++++++++++++++++++++++++++++++-------------- 1 file changed, 35 insertions(+), 15 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 30b4d07f5263..4742497feff0 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -869,23 +869,33 @@ static int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr, .u.plug.addr = cpu_to_virtio64(vm->vdev, addr), .u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size += size; return 0; case VIRTIO_MEM_RESP_NACK: - return -EAGAIN; + rc = -EAGAIN; + break; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, @@ -897,21 +907,30 @@ static int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr, .u.unplug.addr = cpu_to_virtio64(vm->vdev, addr), .u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), }; + int rc = -ENOMEM; if (atomic_read(&vm->config_changed)) return -EAGAIN; + dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: vm->plugged_size -= size; return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; case VIRTIO_MEM_RESP_ERROR: - return -EINVAL; + rc = -EINVAL; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc); + return rc; } static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) @@ -919,6 +938,9 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) const struct virtio_mem_req req = { .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL), }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "unplugging all memory"); switch (virtio_mem_send_request(vm, &req)) { case VIRTIO_MEM_RESP_ACK: @@ -928,10 +950,14 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) atomic_set(&vm->config_changed, 1); return 0; case VIRTIO_MEM_RESP_BUSY: - return -ETXTBSY; + rc = -ETXTBSY; + break; default: - return -ENOMEM; + break; } + + dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc); + return rc; } /* @@ -946,9 +972,6 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, const uint64_t size = count * vm->subblock_size; int rc; - dev_dbg(&vm->vdev->dev, "plugging memory block: %lu : %i - %i\n", mb_id, - sb_id, sb_id + count - 1); - rc = virtio_mem_send_plug_request(vm, addr, size); if (!rc) virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count); @@ -967,9 +990,6 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, const uint64_t size = count * vm->subblock_size; int rc; - dev_dbg(&vm->vdev->dev, "unplugging memory block: %lu : %i - %i\n", - mb_id, sb_id, sb_id + count - 1); - rc = virtio_mem_send_unplug_request(vm, addr, size); if (!rc) virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count); -- cgit v1.2.3 From 89c486c47f2a450d7f064b4927b7f0ab911569a4 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:54 +0100 Subject: virtio-mem: factor out fake-offlining into virtio_mem_fake_offline() ... which now matches virtio_mem_fake_online(). We'll reuse this functionality soon. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-9-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4742497feff0..fedfea27967e 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -793,6 +793,27 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) } } +/* + * Try to allocate a range, marking pages fake-offline, effectively + * fake-offlining them. + */ +static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) +{ + int rc; + + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, + GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + if (rc) + return -EBUSY; + + virtio_mem_set_fake_offline(pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + return 0; +} + static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); @@ -1328,17 +1349,10 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - rc = alloc_contig_range(start_pfn, start_pfn + nr_pages, - MIGRATE_MOVABLE, GFP_KERNEL); - if (rc == -ENOMEM) - /* whoops, out of memory */ - return rc; - if (rc) - return -EBUSY; - /* Mark it as fake-offline before unplugging it */ - virtio_mem_set_fake_offline(start_pfn, nr_pages, true); - adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages); + rc = virtio_mem_fake_offline(start_pfn, nr_pages); + if (rc) + return rc; /* Try to unplug the allocated memory */ rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); -- cgit v1.2.3 From 7a34c77dab7e0c7ecb58da8bf600b7aadb4d878c Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:55 +0100 Subject: virtio-mem: factor out handling of fake-offline pages in memory notifier Let's factor out the core pieces and place the implementation next to virtio_mem_fake_offline(). We'll reuse this functionality soon. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-10-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 73 +++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 23 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index fedfea27967e..c24055248f9d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -160,6 +160,10 @@ static DEFINE_MUTEX(virtio_mem_mutex); static LIST_HEAD(virtio_mem_devices); static void virtio_mem_online_page_cb(struct page *page, unsigned int order); +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages); +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages); /* * Register a virtio-mem device so it will be considered for the online_page @@ -586,27 +590,15 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); - struct page *page; unsigned long pfn; - int sb_id, i; + int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; - /* - * Drop our reference to the pages so the memory can get - * offlined and add the unplugged pages to the managed - * page counters (so offlining code can correctly subtract - * them again). - */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), nr_pages); - for (i = 0; i < nr_pages; i++) { - page = pfn_to_page(pfn + i); - if (WARN_ON(!page_ref_dec_and_test(page))) - dump_page(page, "unplugged page referenced"); - } + virtio_mem_fake_offline_going_offline(pfn, nr_pages); } } @@ -615,21 +607,14 @@ static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, { const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); unsigned long pfn; - int sb_id, i; + int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; - /* - * Get the reference we dropped when going offline and - * subtract the unplugged pages from the managed page - * counters. - */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); - adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); - for (i = 0; i < nr_pages; i++) - page_ref_inc(pfn_to_page(pfn + i)); + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); } } @@ -814,6 +799,48 @@ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) return 0; } +/* + * Handle fake-offline pages when memory is going offline - such that the + * pages can be skipped by mm-core when offlining. + */ +static void virtio_mem_fake_offline_going_offline(unsigned long pfn, + unsigned long nr_pages) +{ + struct page *page; + unsigned long i; + + /* + * Drop our reference to the pages so the memory can get offlined + * and add the unplugged pages to the managed page counters (so + * offlining code can correctly subtract them again). + */ + adjust_managed_page_count(pfn_to_page(pfn), nr_pages); + /* Drop our reference to the pages so the memory can get offlined. */ + for (i = 0; i < nr_pages; i++) { + page = pfn_to_page(pfn + i); + if (WARN_ON(!page_ref_dec_and_test(page))) + dump_page(page, "fake-offline page referenced"); + } +} + +/* + * Handle fake-offline pages when memory offlining is canceled - to undo + * what we did in virtio_mem_fake_offline_going_offline(). + */ +static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, + unsigned long nr_pages) +{ + unsigned long i; + + /* + * Get the reference we dropped when going offline and subtract the + * unplugged pages from the managed page counters. + */ + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + for (i = 0; i < nr_pages; i++) + page_ref_inc(pfn_to_page(pfn + i)); +} + static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); -- cgit v1.2.3 From f2d799d591359685a3a74d28c2989c56f4bb9898 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:56 +0100 Subject: virtio-mem: retry fake-offlining via alloc_contig_range() on ZONE_MOVABLE ZONE_MOVABLE is supposed to give some guarantees, yet, alloc_contig_range() isn't prepared to properly deal with some racy cases properly (e.g., temporary page pinning when exiting processed, PCP). Retry 5 times for now. There is certainly room for improvement in the future. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-11-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 37 ++++++++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index c24055248f9d..2f1ce4d4781b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -784,19 +784,34 @@ static void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages) */ static int virtio_mem_fake_offline(unsigned long pfn, unsigned long nr_pages) { - int rc; + const bool is_movable = zone_idx(page_zone(pfn_to_page(pfn))) == + ZONE_MOVABLE; + int rc, retry_count; - rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, - GFP_KERNEL); - if (rc == -ENOMEM) - /* whoops, out of memory */ - return rc; - if (rc) - return -EBUSY; + /* + * TODO: We want an alloc_contig_range() mode that tries to allocate + * harder (e.g., dealing with temporarily pinned pages, PCP), especially + * with ZONE_MOVABLE. So for now, retry a couple of times with + * ZONE_MOVABLE before giving up - because that zone is supposed to give + * some guarantees. + */ + for (retry_count = 0; retry_count < 5; retry_count++) { + rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE, + GFP_KERNEL); + if (rc == -ENOMEM) + /* whoops, out of memory */ + return rc; + else if (rc && !is_movable) + break; + else if (rc) + continue; - virtio_mem_set_fake_offline(pfn, nr_pages, true); - adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); - return 0; + virtio_mem_set_fake_offline(pfn, nr_pages, true); + adjust_managed_page_count(pfn_to_page(pfn), -nr_pages); + return 0; + } + + return -EBUSY; } /* -- cgit v1.2.3 From 989ff82527074b79bc89ba1c390be1eda01784a5 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:57 +0100 Subject: virtio-mem: generalize check for added memory Let's check by traversing busy system RAM resources instead, to avoid relying on memory block states. Don't use walk_system_ram_range(), as that works on pages and we want to use the bare addresses we have easily at hand. This is a preparation for Big Block Mode (BBM), which won't have memory block states. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-12-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2f1ce4d4781b..3731097cd9e8 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1833,6 +1833,20 @@ static void virtio_mem_delete_resource(struct virtio_mem *vm) vm->parent_resource = NULL; } +static int virtio_mem_range_has_system_ram(struct resource *res, void *arg) +{ + return 1; +} + +static bool virtio_mem_has_memory_added(struct virtio_mem *vm) +{ + const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr, + vm->addr + vm->region_size, NULL, + virtio_mem_range_has_system_ram) == 1; +} + static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; @@ -1954,10 +1968,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) * the system. And there is no way to stop the driver/device from going * away. Warn at least. */ - if (vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE] || - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL]) { + if (virtio_mem_has_memory_added(vm)) { dev_warn(&vdev->dev, "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); -- cgit v1.2.3 From 8464e3bdf208e86410e369601ca363b2a81683e3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:58 +0100 Subject: virtio-mem: generalize virtio_mem_owned_mb() Avoid using memory block ids. Rename it to virtio_mem_contains_range(). This is a preparation for Big Block Mode (BBM). Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-13-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 3731097cd9e8..2193c5172195 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -512,12 +512,13 @@ static bool virtio_mem_overlaps_range(struct virtio_mem *vm, } /* - * Test if a virtio-mem device owns a memory block. Can be called from + * Test if a virtio-mem device contains a given range. Can be called from * (notifier) callbacks lockless. */ -static bool virtio_mem_owned_mb(struct virtio_mem *vm, unsigned long mb_id) +static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - return mb_id >= vm->first_mb_id && mb_id <= vm->last_mb_id; + return start >= vm->addr && start + size <= vm->addr + vm->region_size; } static int virtio_mem_notify_going_online(struct virtio_mem *vm, @@ -871,7 +872,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) */ rcu_read_lock(); list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { - if (!virtio_mem_owned_mb(vm, mb_id)) + if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) continue; sb_id = virtio_mem_phys_to_sb_id(vm, addr); -- cgit v1.2.3 From 835491c554fbdbc18452f4b1546df21879b8b26d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:37:59 +0100 Subject: virtio-mem: generalize virtio_mem_overlaps_range() Avoid using memory block ids. While at it, use uint64_t for address/size. This is a preparation for Big Block Mode (BBM). Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-14-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2193c5172195..bd76aa79a82e 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -501,14 +501,10 @@ static int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id) * Test if a virtio-mem device overlaps with the given range. Can be called * from (notifier) callbacks lockless. */ -static bool virtio_mem_overlaps_range(struct virtio_mem *vm, - unsigned long start, unsigned long size) +static bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start, + uint64_t size) { - unsigned long dev_start = virtio_mem_mb_id_to_phys(vm->first_mb_id); - unsigned long dev_end = virtio_mem_mb_id_to_phys(vm->last_mb_id) + - memory_block_size_bytes(); - - return start < dev_end && dev_start < start + size; + return start < vm->addr + vm->region_size && vm->addr < start + size; } /* -- cgit v1.2.3 From 420066829bb614826115892e81f85b8c4341ee95 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:00 +0100 Subject: virtio-mem: drop last_mb_id No longer used, let's drop it. Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-15-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index bd76aa79a82e..a7beac5942e0 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -84,8 +84,6 @@ struct virtio_mem { /* Id of the first memory block of this device. */ unsigned long first_mb_id; - /* Id of the last memory block of this device. */ - unsigned long last_mb_id; /* Id of the last usable memory block of this device. */ unsigned long last_usable_mb_id; /* Id of the next memory bock to prepare when needed. */ @@ -1773,8 +1771,6 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + memory_block_size_bytes()); vm->next_mb_id = vm->first_mb_id; - vm->last_mb_id = virtio_mem_phys_to_mb_id(vm->addr + - vm->region_size) - 1; dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); -- cgit v1.2.3 From 1d33c2caa8cbdc0f093a8cdad5a4c153ef9cbe8f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:01 +0100 Subject: virtio-mem: don't always trigger the workqueue when offlining memory Let's trigger from offlining code only when we're not allowed to unplug online memory. Handle the other case (memmap possibly freeing up another memory block) when actually removing memory. We now also properly handle the case when removing already offline memory blocks via virtio_mem_mb_remove(). When removing via virtio_mem_remove(), when unloading the driver, virtio_mem_retry() is a NOP and safe to use. While at it, move retry handling when offlining out of virtio_mem_notify_offline(), to share it with Big Block Mode (BBM) soon. This is a preparation for Big Block Mode (BBM), whereby we can see some temporary offlining of memory blocks without actually making progress. Imagine you have a Big Block that spans to Linux memory blocks. Assume the first Linux memory blocks has no unmovable data on it. When we would call offline_and_remove_memory() on the big block, we would 1. Try to offline the first block. Works, notifiers triggered. virtio_mem_retry() called. 2. Try to offline the second block. Does not work. 3. Re-online first block. 4. Exit to main loop, exit workqueue. 5. Retry immediately (due to virtio_mem_retry()), go to 1. The result are endless retries. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-16-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index a7beac5942e0..f86654af8b6b 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -162,6 +162,7 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn, unsigned long nr_pages); static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, unsigned long nr_pages); +static void virtio_mem_retry(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -447,9 +448,17 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + int rc; dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - return remove_memory(vm->nid, addr, memory_block_size_bytes()); + rc = remove_memory(vm->nid, addr, memory_block_size_bytes()); + if (!rc) + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + return rc; } /* @@ -464,11 +473,19 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + int rc; dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - return offline_and_remove_memory(vm->nid, addr, - memory_block_size_bytes()); + rc = offline_and_remove_memory(vm->nid, addr, + memory_block_size_bytes()); + if (!rc) + /* + * We might have freed up memory we can now unplug, retry + * immediately instead of waiting. + */ + virtio_mem_retry(vm); + return rc; } /* @@ -546,15 +563,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, BUG(); break; } - - /* - * Trigger the workqueue, maybe we can now unplug memory. Also, - * when we offline and remove a memory block, this will re-trigger - * us immediately - which is often nice because the removal of - * the memory block (e.g., memmap) might have freed up memory - * on other memory blocks we manage. - */ - virtio_mem_retry(vm); } static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) @@ -672,6 +680,14 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; case MEM_OFFLINE: virtio_mem_notify_offline(vm, mb_id); + + /* + * Trigger the workqueue. Now that we have some offline memory, + * maybe we can handle pending unplug requests. + */ + if (!unplug_online) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; -- cgit v1.2.3 From 98ff9f9411860073f952f1e62a05afb9f6a9e77e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:02 +0100 Subject: virtio-mem: generalize handling when memory is getting onlined deferred We don't want to add too much memory when it's not getting onlined immediately, to avoid running OOM. Generalize the handling, to avoid making use of memory block states. Use a threshold of 1 GiB for now. Properly adjust the offline size when adding/removing memory. As we are not always protected by a lock when touching the offline size, use an atomic64_t. We don't care about races (e.g., someone offlining memory while we are adding more), only about consistent values. (1 GiB needs a memmap of ~16MiB - which sounds reasonable even for setups with little boot memory and (possibly) one virtio-mem device per node) We don't want to retrigger when onlining is caused immediately by our action (e.g., adding memory which immediately gets onlined), so use a flag to indicate if the workqueue is active and use that as an indicator whether to trigger a retry. This will also be especially relevant for Big Block Mode (BBM), whereby we might re-online memory in case offlining of another memory block failed. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-17-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 95 ++++++++++++++++++++++++++++++--------------- 1 file changed, 63 insertions(+), 32 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f86654af8b6b..cbd0aa5eb95c 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -51,6 +51,7 @@ struct virtio_mem { /* Workqueue that processes the plug/unplug requests. */ struct work_struct wq; + atomic_t wq_active; atomic_t config_changed; /* Virtqueue for guest->host requests. */ @@ -99,7 +100,15 @@ struct virtio_mem { /* Summary of all memory block states. */ unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; -#define VIRTIO_MEM_NB_OFFLINE_THRESHOLD 10 + + /* + * We don't want to add too much memory if it's not getting onlined, + * to avoid running OOM. Besides this threshold, we allow to have at + * least two offline blocks at a time (whatever is bigger). + */ +#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD (1024 * 1024 * 1024) + atomic64_t offline_size; + uint64_t offline_threshold; /* * One byte state per memory block. @@ -405,6 +414,18 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) return 0; } +/* + * Test if we could add memory without creating too much offline memory - + * to avoid running OOM if memory is getting onlined deferred. + */ +static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) +{ + if (WARN_ON_ONCE(size > vm->offline_threshold)) + return false; + + return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold; +} + /* * Try to add a memory block to Linux. This will usually only fail * if out of memory. @@ -417,6 +438,8 @@ static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + int rc; /* * When force-unloading the driver and we still have memory added to @@ -430,10 +453,13 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) } dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); - return add_memory_driver_managed(vm->nid, addr, - memory_block_size_bytes(), - vm->resource_name, - MEMHP_MERGE_RESOURCE); + /* Memory might get onlined immediately. */ + atomic64_add(size, &vm->offline_size); + rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, + MEMHP_MERGE_RESOURCE); + if (rc) + atomic64_sub(size, &vm->offline_size); + return rc; } /* @@ -448,16 +474,19 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); int rc; dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); - rc = remove_memory(vm->nid, addr, memory_block_size_bytes()); - if (!rc) + rc = remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); /* * We might have freed up memory we can now unplug, retry * immediately instead of waiting. */ virtio_mem_retry(vm); + } return rc; } @@ -473,18 +502,20 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, unsigned long mb_id) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); int rc; dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", mb_id); - rc = offline_and_remove_memory(vm->nid, addr, - memory_block_size_bytes()); - if (!rc) + rc = offline_and_remove_memory(vm->nid, addr, size); + if (!rc) { + atomic64_sub(size, &vm->offline_size); /* * We might have freed up memory we can now unplug, retry * immediately instead of waiting. */ virtio_mem_retry(vm); + } return rc; } @@ -567,8 +598,6 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) { - unsigned long nb_offline; - switch (virtio_mem_mb_get_state(vm, mb_id)) { case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: virtio_mem_mb_set_state(vm, mb_id, @@ -581,12 +610,6 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) BUG(); break; } - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - - /* see if we can add new blocks now that we onlined one block */ - if (nb_offline == VIRTIO_MEM_NB_OFFLINE_THRESHOLD - 1) - virtio_mem_retry(vm); } static void virtio_mem_notify_going_offline(struct virtio_mem *vm, @@ -681,6 +704,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_OFFLINE: virtio_mem_notify_offline(vm, mb_id); + atomic64_add(size, &vm->offline_size); /* * Trigger the workqueue. Now that we have some offline memory, * maybe we can handle pending unplug requests. @@ -693,6 +717,18 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; case MEM_ONLINE: virtio_mem_notify_online(vm, mb_id); + + atomic64_sub(size, &vm->offline_size); + /* + * Start adding more memory once we onlined half of our + * threshold. Don't trigger if it's possibly due to our actipn + * (e.g., us adding memory which gets onlined immediately from + * the core). + */ + if (!atomic_read(&vm->wq_active) && + virtio_mem_could_add_memory(vm, vm->offline_threshold / 2)) + virtio_mem_retry(vm); + vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -1151,18 +1187,6 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return 0; } -/* - * Don't add too many blocks that are not onlined yet to avoid running OOM. - */ -static bool virtio_mem_too_many_mb_offline(struct virtio_mem *vm) -{ - unsigned long nb_offline; - - nb_offline = vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE] + - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL]; - return nb_offline >= VIRTIO_MEM_NB_OFFLINE_THRESHOLD; -} - /* * Try to plug the desired number of subblocks and add the memory block * to Linux. @@ -1316,7 +1340,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug and add unused blocks */ virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) { - if (virtio_mem_too_many_mb_offline(vm)) + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); @@ -1327,7 +1351,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to prepare, plug and add new blocks */ while (nb_sb) { - if (virtio_mem_too_many_mb_offline(vm)) + if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; rc = virtio_mem_prepare_next_mb(vm, &mb_id); @@ -1620,6 +1644,7 @@ static void virtio_mem_run_wq(struct work_struct *work) if (vm->broken) return; + atomic_set(&vm->wq_active, 1); retry: rc = 0; @@ -1680,6 +1705,8 @@ retry: "unknown error, marking device broken: %d\n", rc); vm->broken = true; } + + atomic_set(&vm->wq_active, 0); } static enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer) @@ -1788,6 +1815,10 @@ static int virtio_mem_init(struct virtio_mem *vm) memory_block_size_bytes()); vm->next_mb_id = vm->first_mb_id; + /* Prepare the offline threshold - make sure we can add two blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), + VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); dev_info(&vm->vdev->dev, "device block size: 0x%llx", -- cgit v1.2.3 From d5614944254cf288b8fd46fda8c86d916346131d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:03 +0100 Subject: virito-mem: document Sub Block Mode (SBM) Let's add some documentation for the current mode - Sub Block Mode (SBM) - to prepare for a new mode - Big Block Mode (BBM). Follow-up patches will properly factor out the existing Sub Block Mode (SBM) and implement Big Block Mode (BBM). Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-18-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index cbd0aa5eb95c..4234bfc0cf52 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -27,6 +27,21 @@ static bool unplug_online = true; module_param(unplug_online, bool, 0644); MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); +/* + * virtio-mem currently supports the following modes of operation: + * + * * Sub Block Mode (SBM): A Linux memory block spans 1..X subblocks (SB). The + * size of a Sub Block (SB) is determined based on the device block size, the + * pageblock size, and the maximum allocation granularity of the buddy. + * Subblocks within a Linux memory block might either be plugged or unplugged. + * Memory is added/removed to Linux MM in Linux memory block granularity. + * + * User space / core MM (auto onlining) is responsible for onlining added + * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are + * always onlined separately, and all memory within a Linux memory block is + * onlined to the same zone - virtio-mem relies on this behavior. + */ + enum virtio_mem_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ VIRTIO_MEM_MB_STATE_UNUSED = 0, -- cgit v1.2.3 From 99f0b55ea6c3a2ed29776ca0dd549d523ae8d6d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:04 +0100 Subject: virtio-mem: memory block states are specific to Sub Block Mode (SBM) let's use a new "sbm" sub-struct to hold SBM-specific state and rename + move applicable definitions, functions, and variables (related to memory block states). While at it: - Drop the "_STATE" part from memory block states - Rename "nb_mb_state" to "mb_count" - "set_mb_state" / "get_mb_state" vs. "mb_set_state" / "mb_get_state" - Don't use lengthy "enum virtio_mem_smb_mb_state", simply use "uint8_t" Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-19-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 215 ++++++++++++++++++++++---------------------- 1 file changed, 109 insertions(+), 106 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 4234bfc0cf52..c6cc301c78e1 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -42,20 +42,23 @@ MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); * onlined to the same zone - virtio-mem relies on this behavior. */ -enum virtio_mem_mb_state { +/* + * State of a Linux memory block in SBM. + */ +enum virtio_mem_sbm_mb_state { /* Unplugged, not added to Linux. Can be reused later. */ - VIRTIO_MEM_MB_STATE_UNUSED = 0, + VIRTIO_MEM_SBM_MB_UNUSED = 0, /* (Partially) plugged, not added to Linux. Error on add_memory(). */ - VIRTIO_MEM_MB_STATE_PLUGGED, + VIRTIO_MEM_SBM_MB_PLUGGED, /* Fully plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE, + VIRTIO_MEM_SBM_MB_OFFLINE, /* Partially plugged, fully added to Linux, offline. */ - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL, /* Fully plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE, + VIRTIO_MEM_SBM_MB_ONLINE, /* Partially plugged, fully added to Linux, online. */ - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL, - VIRTIO_MEM_MB_STATE_COUNT + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL, + VIRTIO_MEM_SBM_MB_COUNT }; struct virtio_mem { @@ -113,9 +116,6 @@ struct virtio_mem { */ const char *resource_name; - /* Summary of all memory block states. */ - unsigned long nb_mb_state[VIRTIO_MEM_MB_STATE_COUNT]; - /* * We don't want to add too much memory if it's not getting onlined, * to avoid running OOM. Besides this threshold, we allow to have at @@ -125,27 +125,29 @@ struct virtio_mem { atomic64_t offline_size; uint64_t offline_threshold; - /* - * One byte state per memory block. - * - * Allocated via vmalloc(). When preparing new blocks, resized - * (alloc+copy+free) when needed (crossing pages with the next mb). - * (when crossing pages). - * - * With 128MB memory blocks, we have states for 512GB of memory in one - * page. - */ - uint8_t *mb_state; + struct { + /* Summary of all memory block states. */ + unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; + + /* + * One byte state per memory block. Allocated via vmalloc(). + * Resized (alloc+copy+free) on demand. + * + * With 128 MiB memory blocks, we have states for 512 GiB of + * memory in one 4 KiB page. + */ + uint8_t *mb_states; + } sbm; /* - * $nb_sb_per_mb bit per memory block. Handled similar to mb_state. + * $nb_sb_per_mb bit per memory block. Handled similar to sbm.mb_states. * * With 4MB subblocks, we manage 128GB of memory in one page. */ unsigned long *sb_bitmap; /* - * Mutex that protects the nb_mb_state, mb_state, and sb_bitmap. + * Mutex that protects the sbm.mb_count, sbm.mb_states, and sb_bitmap. * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -254,68 +256,68 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, /* * Set the state of a memory block, taking care of the state counter. */ -static void virtio_mem_mb_set_state(struct virtio_mem *vm, unsigned long mb_id, - enum virtio_mem_mb_state state) +static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, + unsigned long mb_id, uint8_t state) { const unsigned long idx = mb_id - vm->first_mb_id; - enum virtio_mem_mb_state old_state; + uint8_t old_state; - old_state = vm->mb_state[idx]; - vm->mb_state[idx] = state; + old_state = vm->sbm.mb_states[idx]; + vm->sbm.mb_states[idx] = state; - BUG_ON(vm->nb_mb_state[old_state] == 0); - vm->nb_mb_state[old_state]--; - vm->nb_mb_state[state]++; + BUG_ON(vm->sbm.mb_count[old_state] == 0); + vm->sbm.mb_count[old_state]--; + vm->sbm.mb_count[state]++; } /* * Get the state of a memory block. */ -static enum virtio_mem_mb_state virtio_mem_mb_get_state(struct virtio_mem *vm, - unsigned long mb_id) +static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long idx = mb_id - vm->first_mb_id; - return vm->mb_state[idx]; + return vm->sbm.mb_states[idx]; } /* * Prepare the state array for the next memory block. */ -static int virtio_mem_mb_state_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) { int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); - uint8_t *new_mb_state; + uint8_t *new_array; - if (vm->mb_state && old_pages == new_pages) + if (vm->sbm.mb_states && old_pages == new_pages) return 0; - new_mb_state = vzalloc(new_pages * PAGE_SIZE); - if (!new_mb_state) + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (vm->mb_state) - memcpy(new_mb_state, vm->mb_state, old_pages * PAGE_SIZE); - vfree(vm->mb_state); - vm->mb_state = new_mb_state; + if (vm->sbm.mb_states) + memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE); + vfree(vm->sbm.mb_states); + vm->sbm.mb_states = new_array; mutex_unlock(&vm->hotplug_mutex); return 0; } -#define virtio_mem_for_each_mb_state(_vm, _mb_id, _state) \ +#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ for (_mb_id = _vm->first_mb_id; \ - _mb_id < _vm->next_mb_id && _vm->nb_mb_state[_state]; \ + _mb_id < _vm->next_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id++) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) -#define virtio_mem_for_each_mb_state_rev(_vm, _mb_id, _state) \ +#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ for (_mb_id = _vm->next_mb_id - 1; \ - _mb_id >= _vm->first_mb_id && _vm->nb_mb_state[_state]; \ + _mb_id >= _vm->first_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id--) \ - if (virtio_mem_mb_get_state(_vm, _mb_id) == _state) + if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) /* * Calculate the bit number in the subblock bitmap for the given subblock @@ -581,9 +583,9 @@ static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, static int virtio_mem_notify_going_online(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - case VIRTIO_MEM_MB_STATE_OFFLINE: + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + case VIRTIO_MEM_SBM_MB_OFFLINE: return NOTIFY_OK; default: break; @@ -596,14 +598,14 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm, static void virtio_mem_notify_offline(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); break; - case VIRTIO_MEM_MB_STATE_ONLINE: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + case VIRTIO_MEM_SBM_MB_ONLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); break; default: BUG(); @@ -613,13 +615,14 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) { - switch (virtio_mem_mb_get_state(vm, mb_id)) { - case VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL: - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { + case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); break; - case VIRTIO_MEM_MB_STATE_OFFLINE: - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_ONLINE); + case VIRTIO_MEM_SBM_MB_OFFLINE: + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); break; default: BUG(); @@ -1188,7 +1191,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return -ENOSPC; /* Resize the state array if required. */ - rc = virtio_mem_mb_state_prepare_next_mb(vm); + rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm); if (rc) return rc; @@ -1197,7 +1200,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, if (rc) return rc; - vm->nb_mb_state[VIRTIO_MEM_MB_STATE_UNUSED]++; + vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; *mb_id = vm->next_mb_id++; return 0; } @@ -1231,16 +1234,16 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * so the memory notifiers will find the block in the right state. */ if (count == vm->nb_sb_per_mb) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); /* Add the memory block to linux - if that fails, try to unplug. */ rc = virtio_mem_mb_add(vm, mb_id); if (rc) { - enum virtio_mem_mb_state new_state = VIRTIO_MEM_MB_STATE_UNUSED; + int new_state = VIRTIO_MEM_SBM_MB_UNUSED; dev_err(&vm->vdev->dev, "adding memory block %lu failed with %d\n", mb_id, rc); @@ -1250,8 +1253,8 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * where adding of memory failed - especially on -ENOMEM. */ if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) - new_state = VIRTIO_MEM_MB_STATE_PLUGGED; - virtio_mem_mb_set_state(vm, mb_id, new_state); + new_state = VIRTIO_MEM_SBM_MB_PLUGGED; + virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); return rc; } @@ -1304,11 +1307,11 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { if (online) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE); else - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE); } return 0; @@ -1330,8 +1333,8 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to plug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true); if (rc || !nb_sb) goto out_unlock; @@ -1339,8 +1342,8 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to plug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false); if (rc || !nb_sb) goto out_unlock; @@ -1354,7 +1357,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) mutex_unlock(&vm->hotplug_mutex); /* Try to plug and add unused blocks */ - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED) { + virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) { if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; @@ -1403,8 +1406,8 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, /* some subblocks might have been unplugged even on failure */ if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; @@ -1415,8 +1418,8 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, * unplugged. Temporarily drop the mutex, so * any pending GOING_ONLINE requests can be serviced/rejected. */ - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_remove(vm, mb_id); @@ -1454,8 +1457,8 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, return rc; } - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL); return 0; } @@ -1515,8 +1518,8 @@ unplugged: rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); if (!rc) - virtio_mem_mb_set_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; @@ -1542,8 +1545,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) mutex_lock(&vm->hotplug_mutex); /* Try to unplug subblocks of partially plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1552,8 +1555,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of plugged offline blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1567,8 +1569,8 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of partially plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, + VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1579,8 +1581,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) } /* Try to unplug subblocks of plugged online blocks. */ - virtio_mem_for_each_mb_state_rev(vm, mb_id, - VIRTIO_MEM_MB_STATE_ONLINE) { + virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) @@ -1606,11 +1607,12 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) unsigned long mb_id; int rc; - virtio_mem_for_each_mb_state(vm, mb_id, VIRTIO_MEM_MB_STATE_PLUGGED) { + virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { rc = virtio_mem_mb_unplug(vm, mb_id); if (rc) return rc; - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } return 0; @@ -2002,11 +2004,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) * After we unregistered our callbacks, user space can online partially * plugged offline blocks. Make sure to remove them. */ - virtio_mem_for_each_mb_state(vm, mb_id, - VIRTIO_MEM_MB_STATE_OFFLINE_PARTIAL) { + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { rc = virtio_mem_mb_remove(vm, mb_id); BUG_ON(rc); - virtio_mem_mb_set_state(vm, mb_id, VIRTIO_MEM_MB_STATE_UNUSED); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); } /* * After we unregistered our callbacks, user space can no longer @@ -2031,7 +2034,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) } /* remove all tracking data - no locking needed */ - vfree(vm->mb_state); + vfree(vm->sbm.mb_states); vfree(vm->sb_bitmap); /* reset the device and cleanup the queues */ -- cgit v1.2.3 From 54c6a6ba75ba4c428b659b167f87c07100ba260e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:05 +0100 Subject: virito-mem: subblock states are specific to Sub Block Mode (SBM) Let's rename and move accordingly. While at it, rename sb_bitmap to "sb_states". Reviewed-by: Wei Yang Reviewed-by: Pankaj Gupta Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-20-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 132 +++++++++++++++++++++++--------------------- 1 file changed, 69 insertions(+), 63 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index c6cc301c78e1..851cddf5c606 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -137,17 +137,23 @@ struct virtio_mem { * memory in one 4 KiB page. */ uint8_t *mb_states; - } sbm; - /* - * $nb_sb_per_mb bit per memory block. Handled similar to sbm.mb_states. - * - * With 4MB subblocks, we manage 128GB of memory in one page. - */ - unsigned long *sb_bitmap; + /* + * Bitmap: one bit per subblock. Allocated similar to + * sbm.mb_states. + * + * A set bit means the corresponding subblock is plugged, + * otherwise it's unblocked. + * + * With 4 MiB subblocks, we manage 128 GiB of memory in one + * 4 KiB page. + */ + unsigned long *sb_states; + } sbm; /* - * Mutex that protects the sbm.mb_count, sbm.mb_states, and sb_bitmap. + * Mutex that protects the sbm.mb_count, sbm.mb_states, and + * sbm.sb_states. * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -323,8 +329,8 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) * Calculate the bit number in the subblock bitmap for the given subblock * inside the given memory block. */ -static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, - unsigned long mb_id, int sb_id) +static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, + unsigned long mb_id, int sb_id) { return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; } @@ -334,13 +340,13 @@ static int virtio_mem_sb_bitmap_bit_nr(struct virtio_mem *vm, * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_set(vm->sb_bitmap, bit, count); + __bitmap_set(vm->sbm.sb_states, bit, count); } /* @@ -348,86 +354,87 @@ static void virtio_mem_mb_set_sb_plugged(struct virtio_mem *vm, * * Will not modify the state of the memory block. */ -static void virtio_mem_mb_set_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); - __bitmap_clear(vm->sb_bitmap, bit, count); + __bitmap_clear(vm->sbm.sb_states, bit, count); } /* * Test if all selected subblocks are plugged. */ -static bool virtio_mem_mb_test_sb_plugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); if (count == 1) - return test_bit(bit, vm->sb_bitmap); + return test_bit(bit, vm->sbm.sb_states); /* TODO: Helper similar to bitmap_set() */ - return find_next_zero_bit(vm->sb_bitmap, bit + count, bit) >= + return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >= bit + count; } /* * Test if all selected subblocks are unplugged. */ -static bool virtio_mem_mb_test_sb_unplugged(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, sb_id); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id); /* TODO: Helper similar to bitmap_set() */ - return find_next_bit(vm->sb_bitmap, bit + count, bit) >= bit + count; + return find_next_bit(vm->sbm.sb_states, bit + count, bit) >= + bit + count; } /* * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is * none. */ -static int virtio_mem_mb_first_unplugged_sb(struct virtio_mem *vm, +static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, unsigned long mb_id) { - const int bit = virtio_mem_sb_bitmap_bit_nr(vm, mb_id, 0); + const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); - return find_next_zero_bit(vm->sb_bitmap, bit + vm->nb_sb_per_mb, bit) - - bit; + return find_next_zero_bit(vm->sbm.sb_states, + bit + vm->nb_sb_per_mb, bit) - bit; } /* * Prepare the subblock bitmap for the next memory block. */ -static int virtio_mem_sb_bitmap_prepare_next_mb(struct virtio_mem *vm) +static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb; const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); - unsigned long *new_sb_bitmap, *old_sb_bitmap; + unsigned long *new_bitmap, *old_bitmap; - if (vm->sb_bitmap && old_pages == new_pages) + if (vm->sbm.sb_states && old_pages == new_pages) return 0; - new_sb_bitmap = vzalloc(new_pages * PAGE_SIZE); - if (!new_sb_bitmap) + new_bitmap = vzalloc(new_pages * PAGE_SIZE); + if (!new_bitmap) return -ENOMEM; mutex_lock(&vm->hotplug_mutex); - if (new_sb_bitmap) - memcpy(new_sb_bitmap, vm->sb_bitmap, old_pages * PAGE_SIZE); + if (new_bitmap) + memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE); - old_sb_bitmap = vm->sb_bitmap; - vm->sb_bitmap = new_sb_bitmap; + old_bitmap = vm->sbm.sb_states; + vm->sbm.sb_states = new_bitmap; mutex_unlock(&vm->hotplug_mutex); - vfree(old_sb_bitmap); + vfree(old_bitmap); return 0; } @@ -638,7 +645,7 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); @@ -654,7 +661,7 @@ static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, int sb_id; for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->subblock_size); @@ -944,7 +951,7 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) * If plugged, online the pages, otherwise, set them fake * offline (PageOffline). */ - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) generic_online_page(page, order); else virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, @@ -1102,7 +1109,7 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, rc = virtio_mem_send_plug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_plugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count); return rc; } @@ -1120,7 +1127,7 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, rc = virtio_mem_send_unplug_request(vm, addr, size); if (!rc) - virtio_mem_mb_set_sb_unplugged(vm, mb_id, sb_id, count); + virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count); return rc; } @@ -1143,14 +1150,14 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, while (*nb_sb) { /* Find the next candidate subblock */ while (sb_id >= 0 && - virtio_mem_mb_test_sb_unplugged(vm, mb_id, sb_id, 1)) + virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; /* Try to unplug multiple subblocks at a time */ count = 1; while (count < *nb_sb && sb_id > 0 && - virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { + virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) { count++; sb_id--; } @@ -1196,7 +1203,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return rc; /* Resize the subblock bitmap if required. */ - rc = virtio_mem_sb_bitmap_prepare_next_mb(vm); + rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm); if (rc) return rc; @@ -1281,14 +1288,13 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, return -EINVAL; while (*nb_sb) { - sb_id = virtio_mem_mb_first_unplugged_sb(vm, mb_id); + sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); if (sb_id >= vm->nb_sb_per_mb) break; count = 1; while (count < *nb_sb && sb_id + count < vm->nb_sb_per_mb && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id + count, - 1)) + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count); @@ -1305,7 +1311,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { if (online) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE); @@ -1405,13 +1411,13 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ - if (!virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) + if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { /* * Remove the block from Linux - this should never fail. * Hinder the block from getting onlined by marking it @@ -1480,7 +1486,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, /* If possible, try to unplug the complete block in one shot. */ if (*nb_sb >= vm->nb_sb_per_mb && - virtio_mem_mb_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, vm->nb_sb_per_mb); if (!rc) { @@ -1494,7 +1500,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { /* Find the next candidate subblock */ while (sb_id >= 0 && - !virtio_mem_mb_test_sb_plugged(vm, mb_id, sb_id, 1)) + !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) sb_id--; if (sb_id < 0) break; @@ -1513,7 +1519,7 @@ unplugged: * remove it. This will usually not fail, as no memory is in use * anymore - however some other notifiers might NACK the request. */ - if (virtio_mem_mb_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); @@ -2035,7 +2041,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) /* remove all tracking data - no locking needed */ vfree(vm->sbm.mb_states); - vfree(vm->sb_bitmap); + vfree(vm->sbm.sb_states); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); -- cgit v1.2.3 From 905c4c5146dcb1b1e0a534ae9b5da6c5e4f29c21 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:06 +0100 Subject: virtio-mem: nb_sb_per_mb and subblock_size are specific to Sub Block Mode (SBM) Let's rename to "sbs_per_mb" and "sb_size" and move accordingly. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-21-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 96 ++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 851cddf5c606..6395c3090252 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -96,11 +96,6 @@ struct virtio_mem { /* Maximum region size in bytes. */ uint64_t region_size; - /* The subblock size. */ - uint64_t subblock_size; - /* The number of subblocks per memory block. */ - uint32_t nb_sb_per_mb; - /* Id of the first memory block of this device. */ unsigned long first_mb_id; /* Id of the last usable memory block of this device. */ @@ -126,6 +121,11 @@ struct virtio_mem { uint64_t offline_threshold; struct { + /* The subblock size. */ + uint64_t sb_size; + /* The number of subblocks per Linux memory block. */ + uint32_t sbs_per_mb; + /* Summary of all memory block states. */ unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; @@ -256,7 +256,7 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id); - return (addr - mb_addr) / vm->subblock_size; + return (addr - mb_addr) / vm->sbm.sb_size; } /* @@ -332,7 +332,7 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, unsigned long mb_id, int sb_id) { - return (mb_id - vm->first_mb_id) * vm->nb_sb_per_mb + sb_id; + return (mb_id - vm->first_mb_id) * vm->sbm.sbs_per_mb + sb_id; } /* @@ -395,7 +395,7 @@ static bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm, } /* - * Find the first unplugged subblock. Returns vm->nb_sb_per_mb in case there is + * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is * none. */ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, @@ -404,7 +404,7 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0); return find_next_zero_bit(vm->sbm.sb_states, - bit + vm->nb_sb_per_mb, bit) - bit; + bit + vm->sbm.sbs_per_mb, bit) - bit; } /* @@ -413,8 +413,8 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; - const unsigned long old_nb_bits = old_nb_mb * vm->nb_sb_per_mb; - const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->nb_sb_per_mb; + const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; + const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long)); unsigned long *new_bitmap, *old_bitmap; @@ -640,15 +640,15 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) static void virtio_mem_notify_going_offline(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); virtio_mem_fake_offline_going_offline(pfn, nr_pages); } } @@ -656,15 +656,15 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size); + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; int sb_id; - for (sb_id = 0; sb_id < vm->nb_sb_per_mb; sb_id++) { + for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) { if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) continue; pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); } } @@ -1103,8 +1103,8 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; rc = virtio_mem_send_plug_request(vm, addr, size); @@ -1121,8 +1121,8 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size; - const uint64_t size = count * vm->subblock_size; + sb_id * vm->sbm.sb_size; + const uint64_t size = count * vm->sbm.sb_size; int rc; rc = virtio_mem_send_unplug_request(vm, addr, size); @@ -1146,7 +1146,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, int sb_id, count; int rc; - sb_id = vm->nb_sb_per_mb - 1; + sb_id = vm->sbm.sbs_per_mb - 1; while (*nb_sb) { /* Find the next candidate subblock */ while (sb_id >= 0 && @@ -1181,7 +1181,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, */ static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id) { - uint64_t nb_sb = vm->nb_sb_per_mb; + uint64_t nb_sb = vm->sbm.sbs_per_mb; return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb); } @@ -1222,7 +1222,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, unsigned long mb_id, uint64_t *nb_sb) { - const int count = min_t(int, *nb_sb, vm->nb_sb_per_mb); + const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); int rc; if (WARN_ON_ONCE(!count)) @@ -1240,7 +1240,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Mark the block properly offline before adding it to Linux, * so the memory notifiers will find the block in the right state. */ - if (count == vm->nb_sb_per_mb) + if (count == vm->sbm.sbs_per_mb) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE); else @@ -1289,11 +1289,11 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, while (*nb_sb) { sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id); - if (sb_id >= vm->nb_sb_per_mb) + if (sb_id >= vm->sbm.sbs_per_mb) break; count = 1; while (count < *nb_sb && - sb_id + count < vm->nb_sb_per_mb && + sb_id + count < vm->sbm.sbs_per_mb && !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; @@ -1306,12 +1306,12 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, /* fake-online the pages if the memory block is online */ pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); - nr_pages = PFN_DOWN(count * vm->subblock_size); + sb_id * vm->sbm.sb_size); + nr_pages = PFN_DOWN(count * vm->sbm.sb_size); virtio_mem_fake_online(pfn, nr_pages); } - if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { if (online) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE); @@ -1328,7 +1328,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, */ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1411,13 +1411,13 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ - if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) + if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); if (rc) return rc; - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { /* * Remove the block from Linux - this should never fail. * Hinder the block from getting onlined by marking it @@ -1444,12 +1444,12 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, unsigned long mb_id, int sb_id, int count) { - const unsigned long nr_pages = PFN_DOWN(vm->subblock_size) * count; + const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; unsigned long start_pfn; int rc; start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) + - sb_id * vm->subblock_size); + sb_id * vm->sbm.sb_size); rc = virtio_mem_fake_offline(start_pfn, nr_pages); if (rc) @@ -1485,19 +1485,19 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, int rc, sb_id; /* If possible, try to unplug the complete block in one shot. */ - if (*nb_sb >= vm->nb_sb_per_mb && - virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (*nb_sb >= vm->sbm.sbs_per_mb && + virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, - vm->nb_sb_per_mb); + vm->sbm.sbs_per_mb); if (!rc) { - *nb_sb -= vm->nb_sb_per_mb; + *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; } else if (rc != -EBUSY) return rc; } /* Fallback to single subblocks. */ - for (sb_id = vm->nb_sb_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { + for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) { /* Find the next candidate subblock */ while (sb_id >= 0 && !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) @@ -1519,7 +1519,7 @@ unplugged: * remove it. This will usually not fail, as no memory is in use * anymore - however some other notifiers might NACK the request. */ - if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->nb_sb_per_mb)) { + if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { mutex_unlock(&vm->hotplug_mutex); rc = virtio_mem_mb_offline_and_remove(vm, mb_id); mutex_lock(&vm->hotplug_mutex); @@ -1536,7 +1536,7 @@ unplugged: */ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) { - uint64_t nb_sb = diff / vm->subblock_size; + uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; int rc; @@ -1827,11 +1827,11 @@ static int virtio_mem_init(struct virtio_mem *vm) * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->subblock_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, - pageblock_nr_pages) * PAGE_SIZE; - vm->subblock_size = max_t(uint64_t, vm->device_block_size, - vm->subblock_size); - vm->nb_sb_per_mb = memory_block_size_bytes() / vm->subblock_size; + vm->sbm.sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; + vm->sbm.sb_size = max_t(uint64_t, vm->device_block_size, + vm->sbm.sb_size); + vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; /* Round up to the next full memory block */ vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + @@ -1849,7 +1849,7 @@ static int virtio_mem_init(struct virtio_mem *vm) dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); dev_info(&vm->vdev->dev, "subblock size: 0x%llx", - (unsigned long long)vm->subblock_size); + (unsigned long long)vm->sbm.sb_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); -- cgit v1.2.3 From 8a6f082babea6744b876a23ff5ed6081bf12968d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:07 +0100 Subject: virtio-mem: memory block ids are specific to Sub Block Mode (SBM) Let's move first_mb_id/next_mb_id/last_usable_mb_id accordingly. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-22-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 46 ++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 6395c3090252..248d28e653a9 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -96,13 +96,6 @@ struct virtio_mem { /* Maximum region size in bytes. */ uint64_t region_size; - /* Id of the first memory block of this device. */ - unsigned long first_mb_id; - /* Id of the last usable memory block of this device. */ - unsigned long last_usable_mb_id; - /* Id of the next memory bock to prepare when needed. */ - unsigned long next_mb_id; - /* The parent resource for all memory added via this device. */ struct resource *parent_resource; /* @@ -121,6 +114,13 @@ struct virtio_mem { uint64_t offline_threshold; struct { + /* Id of the first memory block of this device. */ + unsigned long first_mb_id; + /* Id of the last usable memory block of this device. */ + unsigned long last_usable_mb_id; + /* Id of the next memory bock to prepare when needed. */ + unsigned long next_mb_id; + /* The subblock size. */ uint64_t sb_size; /* The number of subblocks per Linux memory block. */ @@ -265,7 +265,7 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, unsigned long mb_id, uint8_t state) { - const unsigned long idx = mb_id - vm->first_mb_id; + const unsigned long idx = mb_id - vm->sbm.first_mb_id; uint8_t old_state; old_state = vm->sbm.mb_states[idx]; @@ -282,7 +282,7 @@ static void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm, static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, unsigned long mb_id) { - const unsigned long idx = mb_id - vm->first_mb_id; + const unsigned long idx = mb_id - vm->sbm.first_mb_id; return vm->sbm.mb_states[idx]; } @@ -292,8 +292,8 @@ static uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm, */ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) { - int old_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id); - int new_pages = PFN_UP(vm->next_mb_id - vm->first_mb_id + 1); + int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id); + int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1); uint8_t *new_array; if (vm->sbm.mb_states && old_pages == new_pages) @@ -314,14 +314,14 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) } #define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \ - for (_mb_id = _vm->first_mb_id; \ - _mb_id < _vm->next_mb_id && _vm->sbm.mb_count[_state]; \ + for (_mb_id = _vm->sbm.first_mb_id; \ + _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id++) \ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) #define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \ - for (_mb_id = _vm->next_mb_id - 1; \ - _mb_id >= _vm->first_mb_id && _vm->sbm.mb_count[_state]; \ + for (_mb_id = _vm->sbm.next_mb_id - 1; \ + _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \ _mb_id--) \ if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state) @@ -332,7 +332,7 @@ static int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm) static int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm, unsigned long mb_id, int sb_id) { - return (mb_id - vm->first_mb_id) * vm->sbm.sbs_per_mb + sb_id; + return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id; } /* @@ -412,7 +412,7 @@ static int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm, */ static int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm) { - const unsigned long old_nb_mb = vm->next_mb_id - vm->first_mb_id; + const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id; const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb; const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb; int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long)); @@ -1194,7 +1194,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, { int rc; - if (vm->next_mb_id > vm->last_usable_mb_id) + if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id) return -ENOSPC; /* Resize the state array if required. */ @@ -1208,7 +1208,7 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, return rc; vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++; - *mb_id = vm->next_mb_id++; + *mb_id = vm->sbm.next_mb_id++; return 0; } @@ -1643,7 +1643,7 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); - vm->last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; + vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; /* see if there is a request to change the size */ virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, @@ -1834,9 +1834,9 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; /* Round up to the next full memory block */ - vm->first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + - memory_block_size_bytes()); - vm->next_mb_id = vm->first_mb_id; + vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + + memory_block_size_bytes()); + vm->sbm.next_mb_id = vm->sbm.first_mb_id; /* Prepare the offline threshold - make sure we can add two blocks. */ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), -- cgit v1.2.3 From 602ef89457173a24dde30874faec1f15a00e112a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:08 +0100 Subject: virito-mem: existing (un)plug functions are specific to Sub Block Mode (SBM) Let's rename them accordingly. virtio_mem_plug_request() and virtio_mem_unplug_request() will be handled separately. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-23-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 90 ++++++++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 47 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 248d28e653a9..ec81f9d4bccf 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -1099,8 +1099,8 @@ static int virtio_mem_send_unplug_all_request(struct virtio_mem *vm) * Plug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size; @@ -1117,8 +1117,8 @@ static int virtio_mem_mb_plug_sb(struct virtio_mem *vm, unsigned long mb_id, * Unplug selected subblocks. Updates the plugged state, but not the state * of the memory block. */ -static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, - int sb_id, int count) +static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, + int sb_id, int count) { const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) + sb_id * vm->sbm.sb_size; @@ -1140,8 +1140,8 @@ static int virtio_mem_mb_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, - unsigned long mb_id, uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { int sb_id, count; int rc; @@ -1162,7 +1162,7 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, sb_id--; } - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1179,18 +1179,18 @@ static int virtio_mem_mb_unplug_any_sb(struct virtio_mem *vm, * * Note: can fail after some subblocks were unplugged. */ -static int virtio_mem_mb_unplug(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id) { uint64_t nb_sb = vm->sbm.sbs_per_mb; - return virtio_mem_mb_unplug_any_sb(vm, mb_id, &nb_sb); + return virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb); } /* * Prepare tracking data for the next memory block. */ -static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, - unsigned long *mb_id) +static int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm, + unsigned long *mb_id) { int rc; @@ -1218,9 +1218,8 @@ static int virtio_mem_prepare_next_mb(struct virtio_mem *vm, * * Will modify the state of the memory block. */ -static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb) { const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb); int rc; @@ -1232,7 +1231,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * Plug the requested number of subblocks before adding it to linux, * so that onlining will directly online all plugged subblocks. */ - rc = virtio_mem_mb_plug_sb(vm, mb_id, 0, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count); if (rc) return rc; @@ -1259,7 +1258,7 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * TODO: Linux MM does not properly clean up yet in all cases * where adding of memory failed - especially on -ENOMEM. */ - if (virtio_mem_mb_unplug_sb(vm, mb_id, 0, count)) + if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_SBM_MB_PLUGGED; virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); return rc; @@ -1277,8 +1276,9 @@ static int virtio_mem_mb_plug_and_add(struct virtio_mem *vm, * * Note: Can fail after some subblocks were successfully plugged. */ -static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, - uint64_t *nb_sb, bool online) +static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, + unsigned long mb_id, uint64_t *nb_sb, + bool online) { unsigned long pfn, nr_pages; int sb_id, count; @@ -1297,7 +1297,7 @@ static int virtio_mem_mb_plug_any_sb(struct virtio_mem *vm, unsigned long mb_id, !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1)) count++; - rc = virtio_mem_mb_plug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count); if (rc) return rc; *nb_sb -= count; @@ -1341,7 +1341,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug subblocks of partially plugged online blocks. */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, true); + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, true); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1350,7 +1350,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) /* Try to plug subblocks of partially plugged offline blocks. */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_plug_any_sb(vm, mb_id, &nb_sb, false); + rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb, false); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1367,7 +1367,7 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc || !nb_sb) return rc; cond_resched(); @@ -1378,10 +1378,10 @@ static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes())) return -ENOSPC; - rc = virtio_mem_prepare_next_mb(vm, &mb_id); + rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id); if (rc) return rc; - rc = virtio_mem_mb_plug_and_add(vm, mb_id, &nb_sb); + rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb); if (rc) return rc; cond_resched(); @@ -1402,13 +1402,13 @@ out_unlock: * * Note: Can fail after some subblocks were successfully unplugged. */ -static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc; - rc = virtio_mem_mb_unplug_any_sb(vm, mb_id, nb_sb); + rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, nb_sb); /* some subblocks might have been unplugged even on failure */ if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) @@ -1440,9 +1440,9 @@ static int virtio_mem_mb_unplug_any_sb_offline(struct virtio_mem *vm, * * Will modify the state of the memory block. */ -static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, - unsigned long mb_id, int sb_id, - int count) +static int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm, + unsigned long mb_id, int sb_id, + int count) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count; unsigned long start_pfn; @@ -1456,7 +1456,7 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, return rc; /* Try to unplug the allocated memory */ - rc = virtio_mem_mb_unplug_sb(vm, mb_id, sb_id, count); + rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count); if (rc) { /* Return the memory to the buddy. */ virtio_mem_fake_online(start_pfn, nr_pages); @@ -1478,17 +1478,17 @@ static int virtio_mem_mb_unplug_sb_online(struct virtio_mem *vm, * Note: Can fail after some subblocks were successfully unplugged. Can * return 0 even if subblocks were busy and could not get unplugged. */ -static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, - unsigned long mb_id, - uint64_t *nb_sb) +static int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm, + unsigned long mb_id, + uint64_t *nb_sb) { int rc, sb_id; /* If possible, try to unplug the complete block in one shot. */ if (*nb_sb >= vm->sbm.sbs_per_mb && virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, 0, - vm->sbm.sbs_per_mb); + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0, + vm->sbm.sbs_per_mb); if (!rc) { *nb_sb -= vm->sbm.sbs_per_mb; goto unplugged; @@ -1505,7 +1505,7 @@ static int virtio_mem_mb_unplug_any_sb_online(struct virtio_mem *vm, if (sb_id < 0) break; - rc = virtio_mem_mb_unplug_sb_online(vm, mb_id, sb_id, 1); + rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1); if (rc == -EBUSY) continue; else if (rc) @@ -1553,8 +1553,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of partially plugged offline blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1562,8 +1561,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of plugged offline blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE) { - rc = virtio_mem_mb_unplug_any_sb_offline(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; cond_resched(); @@ -1577,8 +1575,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of partially plugged online blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1588,8 +1585,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) /* Try to unplug subblocks of plugged online blocks. */ virtio_mem_sbm_for_each_mb_rev(vm, mb_id, VIRTIO_MEM_SBM_MB_ONLINE) { - rc = virtio_mem_mb_unplug_any_sb_online(vm, mb_id, - &nb_sb); + rc = virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, &nb_sb); if (rc || !nb_sb) goto out_unlock; mutex_unlock(&vm->hotplug_mutex); @@ -1614,7 +1610,7 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) int rc; virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { - rc = virtio_mem_mb_unplug(vm, mb_id); + rc = virtio_mem_sbm_unplug_mb(vm, mb_id); if (rc) return rc; virtio_mem_sbm_set_mb_state(vm, mb_id, -- cgit v1.2.3 From d46dfb62f676f949352c7fd8b7a0fa3b7fe1b933 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:09 +0100 Subject: virtio-mem: memory notifier callbacks are specific to Sub Block Mode (SBM) Let's rename accordingly. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-24-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index ec81f9d4bccf..cdcf67e55a56 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -587,8 +587,8 @@ static bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start, return start >= vm->addr && start + size <= vm->addr + vm->region_size; } -static int virtio_mem_notify_going_online(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: @@ -602,8 +602,8 @@ static int virtio_mem_notify_going_online(struct virtio_mem *vm, return NOTIFY_BAD; } -static void virtio_mem_notify_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_offline(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_ONLINE_PARTIAL: @@ -620,7 +620,8 @@ static void virtio_mem_notify_offline(struct virtio_mem *vm, } } -static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) +static void virtio_mem_sbm_notify_online(struct virtio_mem *vm, + unsigned long mb_id) { switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) { case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL: @@ -637,8 +638,8 @@ static void virtio_mem_notify_online(struct virtio_mem *vm, unsigned long mb_id) } } -static void virtio_mem_notify_going_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; @@ -653,8 +654,8 @@ static void virtio_mem_notify_going_offline(struct virtio_mem *vm, } } -static void virtio_mem_notify_cancel_offline(struct virtio_mem *vm, - unsigned long mb_id) +static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long mb_id) { const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size); unsigned long pfn; @@ -714,7 +715,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - virtio_mem_notify_going_offline(vm, mb_id); + virtio_mem_sbm_notify_going_offline(vm, mb_id); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -724,10 +725,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - rc = virtio_mem_notify_going_online(vm, mb_id); + rc = virtio_mem_sbm_notify_going_online(vm, mb_id); break; case MEM_OFFLINE: - virtio_mem_notify_offline(vm, mb_id); + virtio_mem_sbm_notify_offline(vm, mb_id); atomic64_add(size, &vm->offline_size); /* @@ -741,7 +742,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_unlock(&vm->hotplug_mutex); break; case MEM_ONLINE: - virtio_mem_notify_online(vm, mb_id); + virtio_mem_sbm_notify_online(vm, mb_id); atomic64_sub(size, &vm->offline_size); /* @@ -760,7 +761,7 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: if (!vm->hotplug_active) break; - virtio_mem_notify_cancel_offline(vm, mb_id); + virtio_mem_sbm_notify_cancel_offline(vm, mb_id); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; -- cgit v1.2.3 From 01afdee29aef144ad956d1d5302aaaeabf498f48 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:10 +0100 Subject: virtio-mem: factor out adding/removing memory from Linux Let's use wrappers for the low-level functions that dev_dbg/dev_warn and work on addr + size, such that we can reuse them for adding/removing in other granularity. We only warn when adding memory failed, because that's something to pay attention to. We won't warn when removing failed, we'll reuse that in racy context soon (and we do have proper BUG_ON() statements in the current cases where it must never happen). Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-25-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 107 ++++++++++++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 34 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index cdcf67e55a56..95fa0262af1d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -451,18 +451,16 @@ static bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size) } /* - * Try to add a memory block to Linux. This will usually only fail - * if out of memory. + * Try adding memory to Linux. Will usually only fail if out of memory. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; /* @@ -476,32 +474,50 @@ static int virtio_mem_mb_add(struct virtio_mem *vm, unsigned long mb_id) return -ENOMEM; } - dev_dbg(&vm->vdev->dev, "adding memory block: %lu\n", mb_id); + dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); /* Memory might get onlined immediately. */ atomic64_add(size, &vm->offline_size); rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name, MEMHP_MERGE_RESOURCE); - if (rc) + if (rc) { atomic64_sub(size, &vm->offline_size); + dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc); + /* + * TODO: Linux MM does not properly clean up yet in all cases + * where adding of memory failed - especially on -ENOMEM. + */ + } return rc; } /* - * Try to remove a memory block from Linux. Will only fail if the memory block - * is not offline. + * See virtio_mem_add_memory(): Try adding a single Linux memory block. + */ +static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_add_memory(vm, addr, size); +} + +/* + * Try removing memory from Linux. Will only fail if memory blocks aren't + * offline. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) +static int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; - dev_dbg(&vm->vdev->dev, "removing memory block: %lu\n", mb_id); + dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); rc = remove_memory(vm->nid, addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); @@ -510,27 +526,41 @@ static int virtio_mem_mb_remove(struct virtio_mem *vm, unsigned long mb_id) * immediately instead of waiting. */ virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc); } return rc; } /* - * Try to offline and remove a memory block from Linux. + * See virtio_mem_remove_memory(): Try removing a single Linux memory block. + */ +static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_remove_memory(vm, addr, size); +} + +/* + * Try offlining and removing memory from Linux. * * Must not be called with the vm->hotplug_mutex held (possible deadlock with * onlining code). * - * Will not modify the state of the memory block. + * Will not modify the state of memory blocks in virtio-mem. */ -static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, - unsigned long mb_id) +static int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm, + uint64_t addr, + uint64_t size) { - const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); - const uint64_t size = memory_block_size_bytes(); int rc; - dev_dbg(&vm->vdev->dev, "offlining and removing memory block: %lu\n", - mb_id); + dev_dbg(&vm->vdev->dev, + "offlining and removing memory: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + rc = offline_and_remove_memory(vm->nid, addr, size); if (!rc) { atomic64_sub(size, &vm->offline_size); @@ -539,10 +569,26 @@ static int virtio_mem_mb_offline_and_remove(struct virtio_mem *vm, * immediately instead of waiting. */ virtio_mem_retry(vm); + } else { + dev_dbg(&vm->vdev->dev, + "offlining and removing memory failed: %d\n", rc); } return rc; } +/* + * See virtio_mem_offline_and_remove_memory(): Try offlining and removing + * a single Linux memory block. + */ +static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, + unsigned long mb_id) +{ + const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id); + const uint64_t size = memory_block_size_bytes(); + + return virtio_mem_offline_and_remove_memory(vm, addr, size); +} + /* * Trigger the workqueue so the device can perform its magic. */ @@ -1248,17 +1294,10 @@ static int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL); /* Add the memory block to linux - if that fails, try to unplug. */ - rc = virtio_mem_mb_add(vm, mb_id); + rc = virtio_mem_sbm_add_mb(vm, mb_id); if (rc) { int new_state = VIRTIO_MEM_SBM_MB_UNUSED; - dev_err(&vm->vdev->dev, - "adding memory block %lu failed with %d\n", mb_id, rc); - - /* - * TODO: Linux MM does not properly clean up yet in all cases - * where adding of memory failed - especially on -ENOMEM. - */ if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count)) new_state = VIRTIO_MEM_SBM_MB_PLUGGED; virtio_mem_sbm_set_mb_state(vm, mb_id, new_state); @@ -1429,7 +1468,7 @@ static int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm, VIRTIO_MEM_SBM_MB_UNUSED); mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_remove(vm, mb_id); + rc = virtio_mem_sbm_remove_mb(vm, mb_id); BUG_ON(rc); mutex_lock(&vm->hotplug_mutex); } @@ -1522,7 +1561,7 @@ unplugged: */ if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) { mutex_unlock(&vm->hotplug_mutex); - rc = virtio_mem_mb_offline_and_remove(vm, mb_id); + rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id); mutex_lock(&vm->hotplug_mutex); if (!rc) virtio_mem_sbm_set_mb_state(vm, mb_id, @@ -2009,7 +2048,7 @@ static void virtio_mem_remove(struct virtio_device *vdev) */ virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_mb_remove(vm, mb_id); + rc = virtio_mem_sbm_remove_mb(vm, mb_id); BUG_ON(rc); virtio_mem_sbm_set_mb_state(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED); -- cgit v1.2.3 From 4ba50cd3355d742c8befbfe38dcbe559f2b0f758 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:11 +0100 Subject: virtio-mem: Big Block Mode (BBM) memory hotplug Currently, we do not support device block sizes that exceed the Linux memory block size. For example, having a device block size of 1 GiB (e.g., gigantic pages in the hypervisor) won't work with 128 MiB Linux memory blocks. Let's implement Big Block Mode (BBM), whereby we add/remove at least one Linux memory block at a time. With a 1 GiB device block size, a Big Block (BB) will cover 8 Linux memory blocks. We'll keep registering the online_page_callback machinery, it will be used for safe memory hotunplug in BBM next. Note: BBM is properly prepared for variable-sized Linux memory blocks that we might see in the future. So we won't care how many Linux memory blocks a big block actually spans, and how the memory notifier is called. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-26-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 560 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 441 insertions(+), 119 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 95fa0262af1d..8a4f735360ac 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -30,12 +30,18 @@ MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); /* * virtio-mem currently supports the following modes of operation: * - * * Sub Block Mode (SBM): A Linux memory block spans 1..X subblocks (SB). The + * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The * size of a Sub Block (SB) is determined based on the device block size, the * pageblock size, and the maximum allocation granularity of the buddy. * Subblocks within a Linux memory block might either be plugged or unplugged. * Memory is added/removed to Linux MM in Linux memory block granularity. * + * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks. + * Memory is added/removed to Linux MM in Big Block granularity. + * + * The mode is determined automatically based on the Linux memory block size + * and the device block size. + * * User space / core MM (auto onlining) is responsible for onlining added * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are * always onlined separately, and all memory within a Linux memory block is @@ -61,6 +67,19 @@ enum virtio_mem_sbm_mb_state { VIRTIO_MEM_SBM_MB_COUNT }; +/* + * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks. + */ +enum virtio_mem_bbm_bb_state { + /* Unplugged, not added to Linux. Can be reused later. */ + VIRTIO_MEM_BBM_BB_UNUSED = 0, + /* Plugged, not added to Linux. Error on add_memory(). */ + VIRTIO_MEM_BBM_BB_PLUGGED, + /* Plugged and added to Linux. */ + VIRTIO_MEM_BBM_BB_ADDED, + VIRTIO_MEM_BBM_BB_COUNT +}; + struct virtio_mem { struct virtio_device *vdev; @@ -113,47 +132,70 @@ struct virtio_mem { atomic64_t offline_size; uint64_t offline_threshold; - struct { - /* Id of the first memory block of this device. */ - unsigned long first_mb_id; - /* Id of the last usable memory block of this device. */ - unsigned long last_usable_mb_id; - /* Id of the next memory bock to prepare when needed. */ - unsigned long next_mb_id; - - /* The subblock size. */ - uint64_t sb_size; - /* The number of subblocks per Linux memory block. */ - uint32_t sbs_per_mb; - - /* Summary of all memory block states. */ - unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; - - /* - * One byte state per memory block. Allocated via vmalloc(). - * Resized (alloc+copy+free) on demand. - * - * With 128 MiB memory blocks, we have states for 512 GiB of - * memory in one 4 KiB page. - */ - uint8_t *mb_states; - - /* - * Bitmap: one bit per subblock. Allocated similar to - * sbm.mb_states. - * - * A set bit means the corresponding subblock is plugged, - * otherwise it's unblocked. - * - * With 4 MiB subblocks, we manage 128 GiB of memory in one - * 4 KiB page. - */ - unsigned long *sb_states; - } sbm; + /* If set, the driver is in SBM, otherwise in BBM. */ + bool in_sbm; + + union { + struct { + /* Id of the first memory block of this device. */ + unsigned long first_mb_id; + /* Id of the last usable memory block of this device. */ + unsigned long last_usable_mb_id; + /* Id of the next memory bock to prepare when needed. */ + unsigned long next_mb_id; + + /* The subblock size. */ + uint64_t sb_size; + /* The number of subblocks per Linux memory block. */ + uint32_t sbs_per_mb; + + /* Summary of all memory block states. */ + unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT]; + + /* + * One byte state per memory block. Allocated via + * vmalloc(). Resized (alloc+copy+free) on demand. + * + * With 128 MiB memory blocks, we have states for 512 + * GiB of memory in one 4 KiB page. + */ + uint8_t *mb_states; + + /* + * Bitmap: one bit per subblock. Allocated similar to + * sbm.mb_states. + * + * A set bit means the corresponding subblock is + * plugged, otherwise it's unblocked. + * + * With 4 MiB subblocks, we manage 128 GiB of memory + * in one 4 KiB page. + */ + unsigned long *sb_states; + } sbm; + + struct { + /* Id of the first big block of this device. */ + unsigned long first_bb_id; + /* Id of the last usable big block of this device. */ + unsigned long last_usable_bb_id; + /* Id of the next device bock to prepare when needed. */ + unsigned long next_bb_id; + + /* Summary of all big block states. */ + unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT]; + + /* One byte state per big block. See sbm.mb_states. */ + uint8_t *bb_states; + + /* The block size used for plugging/adding/removing. */ + uint64_t bb_size; + } bbm; + }; /* - * Mutex that protects the sbm.mb_count, sbm.mb_states, and - * sbm.sb_states. + * Mutex that protects the sbm.mb_count, sbm.mb_states, + * sbm.sb_states, bbm.bb_count, and bbm.bb_states * * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get @@ -247,6 +289,24 @@ static unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id) return mb_id * memory_block_size_bytes(); } +/* + * Calculate the big block id of a given address. + */ +static unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm, + uint64_t addr) +{ + return addr / vm->bbm.bb_size; +} + +/* + * Calculate the physical start address of a given big block id. + */ +static uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm, + unsigned long bb_id) +{ + return bb_id * vm->bbm.bb_size; +} + /* * Calculate the subblock id of a given address. */ @@ -259,6 +319,67 @@ static unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm, return (addr - mb_addr) / vm->sbm.sb_size; } +/* + * Set the state of a big block, taking care of the state counter. + */ +static void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm, + unsigned long bb_id, + enum virtio_mem_bbm_bb_state state) +{ + const unsigned long idx = bb_id - vm->bbm.first_bb_id; + enum virtio_mem_bbm_bb_state old_state; + + old_state = vm->bbm.bb_states[idx]; + vm->bbm.bb_states[idx] = state; + + BUG_ON(vm->bbm.bb_count[old_state] == 0); + vm->bbm.bb_count[old_state]--; + vm->bbm.bb_count[state]++; +} + +/* + * Get the state of a big block. + */ +static enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm, + unsigned long bb_id) +{ + return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id]; +} + +/* + * Prepare the big block state array for the next big block. + */ +static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) +{ + unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id; + unsigned long new_bytes = old_bytes + 1; + int old_pages = PFN_UP(old_bytes); + int new_pages = PFN_UP(new_bytes); + uint8_t *new_array; + + if (vm->bbm.bb_states && old_pages == new_pages) + return 0; + + new_array = vzalloc(new_pages * PAGE_SIZE); + if (!new_array) + return -ENOMEM; + + mutex_lock(&vm->hotplug_mutex); + if (vm->bbm.bb_states) + memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE); + vfree(vm->bbm.bb_states); + vm->bbm.bb_states = new_array; + mutex_unlock(&vm->hotplug_mutex); + + return 0; +} + +#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.first_bb_id; \ + _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id++) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) + /* * Set the state of a memory block, taking care of the state counter. */ @@ -502,6 +623,17 @@ static int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id) return virtio_mem_add_memory(vm, addr, size); } +/* + * See virtio_mem_add_memory(): Try adding a big block. + */ +static int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_add_memory(vm, addr, size); +} + /* * Try removing memory from Linux. Will only fail if memory blocks aren't * offline. @@ -729,20 +861,33 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, struct memory_notify *mhp = arg; const unsigned long start = PFN_PHYS(mhp->start_pfn); const unsigned long size = PFN_PHYS(mhp->nr_pages); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(start); int rc = NOTIFY_OK; + unsigned long id; if (!virtio_mem_overlaps_range(vm, start, size)) return NOTIFY_DONE; - /* - * Memory is onlined/offlined in memory block granularity. We cannot - * cross virtio-mem device boundaries and memory block boundaries. Bail - * out if this ever changes. - */ - if (WARN_ON_ONCE(size != memory_block_size_bytes() || - !IS_ALIGNED(start, memory_block_size_bytes()))) - return NOTIFY_BAD; + if (vm->in_sbm) { + id = virtio_mem_phys_to_mb_id(start); + /* + * In SBM, we add memory in separate memory blocks - we expect + * it to be onlined/offlined in the same granularity. Bail out + * if this ever changes. + */ + if (WARN_ON_ONCE(size != memory_block_size_bytes() || + !IS_ALIGNED(start, memory_block_size_bytes()))) + return NOTIFY_BAD; + } else { + id = virtio_mem_phys_to_bb_id(vm, start); + /* + * In BBM, we only care about onlining/offlining happening + * within a single big block, we don't care about the + * actual granularity as we don't track individual Linux + * memory blocks. + */ + if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1))) + return NOTIFY_BAD; + } /* * Avoid circular locking lockdep warnings. We lock the mutex @@ -761,7 +906,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - virtio_mem_sbm_notify_going_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_going_offline(vm, id); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -771,10 +917,12 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; } vm->hotplug_active = true; - rc = virtio_mem_sbm_notify_going_online(vm, mb_id); + if (vm->in_sbm) + rc = virtio_mem_sbm_notify_going_online(vm, id); break; case MEM_OFFLINE: - virtio_mem_sbm_notify_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_offline(vm, id); atomic64_add(size, &vm->offline_size); /* @@ -788,7 +936,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, mutex_unlock(&vm->hotplug_mutex); break; case MEM_ONLINE: - virtio_mem_sbm_notify_online(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_online(vm, id); atomic64_sub(size, &vm->offline_size); /* @@ -807,7 +956,8 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, case MEM_CANCEL_OFFLINE: if (!vm->hotplug_active) break; - virtio_mem_sbm_notify_cancel_offline(vm, mb_id); + if (vm->in_sbm) + virtio_mem_sbm_notify_cancel_offline(vm, id); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -978,27 +1128,29 @@ static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, static void virtio_mem_online_page_cb(struct page *page, unsigned int order) { const unsigned long addr = page_to_phys(page); - const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr); + unsigned long id, sb_id; struct virtio_mem *vm; - int sb_id; + bool do_online; - /* - * We exploit here that subblocks have at least MAX_ORDER_NR_PAGES. - * size/alignment and that this callback is is called with such a - * size/alignment. So we cannot cross subblocks and therefore - * also not memory blocks. - */ rcu_read_lock(); list_for_each_entry_rcu(vm, &virtio_mem_devices, next) { if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order))) continue; - sb_id = virtio_mem_phys_to_sb_id(vm, addr); - /* - * If plugged, online the pages, otherwise, set them fake - * offline (PageOffline). - */ - if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1)) + if (vm->in_sbm) { + /* + * We exploit here that subblocks have at least + * MAX_ORDER_NR_PAGES size/alignment - so we cannot + * cross subblocks within one call. + */ + id = virtio_mem_phys_to_mb_id(addr); + sb_id = virtio_mem_phys_to_sb_id(vm, addr); + do_online = virtio_mem_sbm_test_sb_plugged(vm, id, + sb_id, 1); + } else { + do_online = true; + } + if (do_online) generic_online_page(page, order); else virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order, @@ -1178,6 +1330,32 @@ static int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id, return rc; } +/* + * Request to unplug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_unplug_request(vm, addr, size); +} + +/* + * Request to plug a big block. + * + * Will not modify the state of the big block. + */ +static int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_send_plug_request(vm, addr, size); +} + /* * Unplug the desired number of plugged subblocks of a offline or not-added * memory block. Will fail if any subblock cannot get unplugged (instead of @@ -1363,10 +1541,7 @@ static int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm, return 0; } -/* - * Try to plug the requested amount of memory. - */ -static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff) { uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; @@ -1433,6 +1608,112 @@ out_unlock: return rc; } +/* + * Plug a big block and add it to Linux. + * + * Will modify the state of the big block. + */ +static int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_UNUSED)) + return -EINVAL; + + rc = virtio_mem_bbm_plug_bb(vm, bb_id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + + rc = virtio_mem_bbm_add_bb(vm, bb_id); + if (rc) { + if (!virtio_mem_bbm_unplug_bb(vm, bb_id)) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + else + /* Retry from the main loop. */ + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + return rc; + } + return 0; +} + +/* + * Prepare tracking data for the next big block. + */ +static int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm, + unsigned long *bb_id) +{ + int rc; + + if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id) + return -ENOSPC; + + /* Resize the big block state array if required. */ + rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm); + if (rc) + return rc; + + vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++; + *bb_id = vm->bbm.next_bb_id; + vm->bbm.next_bb_id++; + return 0; +} + +static int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + unsigned long bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to plug and add unused big blocks */ + virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + cond_resched(); + } + + /* Try to prepare, plug and add new big blocks */ + while (nb_bb) { + if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size)) + return -ENOSPC; + + rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id); + if (rc) + return rc; + rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id); + if (!rc) + nb_bb--; + if (rc) + return rc; + cond_resched(); + } + + return 0; +} + +/* + * Try to plug the requested amount of memory. + */ +static int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_plug_request(vm, diff); + return virtio_mem_bbm_plug_request(vm, diff); +} + /* * Unplug the desired number of plugged subblocks of an offline memory block. * Will fail if any subblock cannot get unplugged (instead of skipping it). @@ -1571,10 +1852,7 @@ unplugged: return 0; } -/* - * Try to unplug the requested amount of memory. - */ -static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +static int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff) { uint64_t nb_sb = diff / vm->sbm.sb_size; unsigned long mb_id; @@ -1640,20 +1918,42 @@ out_unlock: return rc; } +/* + * Try to unplug the requested amount of memory. + */ +static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + if (vm->in_sbm) + return virtio_mem_sbm_unplug_request(vm, diff); + return -EBUSY; +} + /* * Try to unplug all blocks that couldn't be unplugged before, for example, * because the hypervisor was busy. */ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm) { - unsigned long mb_id; + unsigned long id; int rc; - virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_PLUGGED) { - rc = virtio_mem_sbm_unplug_mb(vm, mb_id); + if (!vm->in_sbm) { + virtio_mem_bbm_for_each_bb(vm, id, + VIRTIO_MEM_BBM_BB_PLUGGED) { + rc = virtio_mem_bbm_unplug_bb(vm, id); + if (rc) + return rc; + virtio_mem_bbm_set_bb_state(vm, id, + VIRTIO_MEM_BBM_BB_UNUSED); + } + return 0; + } + + virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) { + rc = virtio_mem_sbm_unplug_mb(vm, id); if (rc) return rc; - virtio_mem_sbm_set_mb_state(vm, mb_id, + virtio_mem_sbm_set_mb_state(vm, id, VIRTIO_MEM_SBM_MB_UNUSED); } @@ -1679,7 +1979,13 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm) usable_region_size, &usable_region_size); end_addr = vm->addr + usable_region_size; end_addr = min(end_addr, phys_limit); - vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr) - 1; + + if (vm->in_sbm) + vm->sbm.last_usable_mb_id = + virtio_mem_phys_to_mb_id(end_addr) - 1; + else + vm->bbm.last_usable_bb_id = + virtio_mem_phys_to_bb_id(vm, end_addr) - 1; /* see if there is a request to change the size */ virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size, @@ -1802,6 +2108,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) static int virtio_mem_init(struct virtio_mem *vm) { const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS; + uint64_t sb_size, addr; uint16_t node_id; if (!vm->vdev->config->get) { @@ -1834,16 +2141,6 @@ static int virtio_mem_init(struct virtio_mem *vm) if (vm->nid == NUMA_NO_NODE) vm->nid = memory_add_physaddr_to_nid(vm->addr); - /* - * We always hotplug memory in memory block granularity. This way, - * we have to wait for exactly one memory block to online. - */ - if (vm->device_block_size > memory_block_size_bytes()) { - dev_err(&vm->vdev->dev, - "The block size is not supported (too big).\n"); - return -EINVAL; - } - /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) dev_warn(&vm->vdev->dev, @@ -1863,20 +2160,35 @@ static int virtio_mem_init(struct virtio_mem *vm) * - Is required for now for alloc_contig_range() to work reliably - * it doesn't properly handle smaller granularity on ZONE_NORMAL. */ - vm->sbm.sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, - pageblock_nr_pages) * PAGE_SIZE; - vm->sbm.sb_size = max_t(uint64_t, vm->device_block_size, - vm->sbm.sb_size); - vm->sbm.sbs_per_mb = memory_block_size_bytes() / vm->sbm.sb_size; + sb_size = max_t(uint64_t, MAX_ORDER_NR_PAGES, + pageblock_nr_pages) * PAGE_SIZE; + sb_size = max_t(uint64_t, vm->device_block_size, sb_size); + + if (sb_size < memory_block_size_bytes()) { + /* SBM: At least two subblocks per Linux memory block. */ + vm->in_sbm = true; + vm->sbm.sb_size = sb_size; + vm->sbm.sbs_per_mb = memory_block_size_bytes() / + vm->sbm.sb_size; + + /* Round up to the next full memory block */ + addr = vm->addr + memory_block_size_bytes() - 1; + vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr); + vm->sbm.next_mb_id = vm->sbm.first_mb_id; + } else { + /* BBM: At least one Linux memory block. */ + vm->bbm.bb_size = vm->device_block_size; - /* Round up to the next full memory block */ - vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(vm->addr - 1 + - memory_block_size_bytes()); - vm->sbm.next_mb_id = vm->sbm.first_mb_id; + vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, vm->addr); + vm->bbm.next_bb_id = vm->bbm.first_bb_id; + } /* Prepare the offline threshold - make sure we can add two blocks. */ vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(), VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD); + /* In BBM, we also want at least two big blocks. */ + vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size, + vm->offline_threshold); dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); @@ -1884,8 +2196,12 @@ static int virtio_mem_init(struct virtio_mem *vm) (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); - dev_info(&vm->vdev->dev, "subblock size: 0x%llx", - (unsigned long long)vm->sbm.sb_size); + if (vm->in_sbm) + dev_info(&vm->vdev->dev, "subblock size: 0x%llx", + (unsigned long long)vm->sbm.sb_size); + else + dev_info(&vm->vdev->dev, "big block size: 0x%llx", + (unsigned long long)vm->bbm.bb_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); @@ -2042,22 +2358,24 @@ static void virtio_mem_remove(struct virtio_device *vdev) cancel_work_sync(&vm->wq); hrtimer_cancel(&vm->retry_timer); - /* - * After we unregistered our callbacks, user space can online partially - * plugged offline blocks. Make sure to remove them. - */ - virtio_mem_sbm_for_each_mb(vm, mb_id, - VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { - rc = virtio_mem_sbm_remove_mb(vm, mb_id); - BUG_ON(rc); - virtio_mem_sbm_set_mb_state(vm, mb_id, - VIRTIO_MEM_SBM_MB_UNUSED); + if (vm->in_sbm) { + /* + * After we unregistered our callbacks, user space can online + * partially plugged offline blocks. Make sure to remove them. + */ + virtio_mem_sbm_for_each_mb(vm, mb_id, + VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) { + rc = virtio_mem_sbm_remove_mb(vm, mb_id); + BUG_ON(rc); + virtio_mem_sbm_set_mb_state(vm, mb_id, + VIRTIO_MEM_SBM_MB_UNUSED); + } + /* + * After we unregistered our callbacks, user space can no longer + * offline partially plugged online memory blocks. No need to + * worry about them. + */ } - /* - * After we unregistered our callbacks, user space can no longer - * offline partially plugged online memory blocks. No need to worry - * about them. - */ /* unregister callbacks */ unregister_virtio_mem_device(vm); @@ -2076,8 +2394,12 @@ static void virtio_mem_remove(struct virtio_device *vdev) } /* remove all tracking data - no locking needed */ - vfree(vm->sbm.mb_states); - vfree(vm->sbm.sb_states); + if (vm->in_sbm) { + vfree(vm->sbm.mb_states); + vfree(vm->sbm.sb_states); + } else { + vfree(vm->bbm.bb_states); + } /* reset the device and cleanup the queues */ vdev->config->reset(vdev); -- cgit v1.2.3 From faa45ff4ce885af93a3233a408c5a74b2943226b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:12 +0100 Subject: virtio-mem: allow to force Big Block Mode (BBM) and set the big block size Let's allow to force BBM, even if subblocks would be possible. Take care of properly calculating the first big block id, because the start address might no longer be aligned to the big block size. Also, allow to manually configure the size of Big Blocks. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-27-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 8a4f735360ac..861149acafe5 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -27,6 +27,16 @@ static bool unplug_online = true; module_param(unplug_online, bool, 0644); MODULE_PARM_DESC(unplug_online, "Try to unplug online memory"); +static bool force_bbm; +module_param(force_bbm, bool, 0444); +MODULE_PARM_DESC(force_bbm, + "Force Big Block Mode. Default is 0 (auto-selection)"); + +static unsigned long bbm_block_size; +module_param(bbm_block_size, ulong, 0444); +MODULE_PARM_DESC(bbm_block_size, + "Big Block size in bytes. Default is 0 (auto-detection)."); + /* * virtio-mem currently supports the following modes of operation: * @@ -2164,7 +2174,7 @@ static int virtio_mem_init(struct virtio_mem *vm) pageblock_nr_pages) * PAGE_SIZE; sb_size = max_t(uint64_t, vm->device_block_size, sb_size); - if (sb_size < memory_block_size_bytes()) { + if (sb_size < memory_block_size_bytes() && !force_bbm) { /* SBM: At least two subblocks per Linux memory block. */ vm->in_sbm = true; vm->sbm.sb_size = sb_size; @@ -2177,9 +2187,24 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->sbm.next_mb_id = vm->sbm.first_mb_id; } else { /* BBM: At least one Linux memory block. */ - vm->bbm.bb_size = vm->device_block_size; + vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size, + memory_block_size_bytes()); + + if (bbm_block_size) { + if (!is_power_of_2(bbm_block_size)) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is not a power of 2"); + } else if (bbm_block_size < vm->bbm.bb_size) { + dev_warn(&vm->vdev->dev, + "bbm_block_size is too small"); + } else { + vm->bbm.bb_size = bbm_block_size; + } + } - vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, vm->addr); + /* Round up to the next aligned big block */ + addr = vm->addr + vm->bbm.bb_size - 1; + vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr); vm->bbm.next_bb_id = vm->bbm.first_bb_id; } -- cgit v1.2.3 From 8dc4bb58a146655eb057247d7c9d19e73928715b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:13 +0100 Subject: mm/memory_hotplug: extend offline_and_remove_memory() to handle more than one memory block virtio-mem soon wants to use offline_and_remove_memory() memory that exceeds a single Linux memory block (memory_block_size_bytes()). Let's remove that restriction. Let's remember the old state and try to restore that if anything goes wrong. While re-onlining can, in general, fail, it's highly unlikely to happen (usually only when a notifier fails to allocate memory, and these are rather rare). This will be used by virtio-mem to offline+remove memory ranges that are bigger than a single memory block - for example, with a device block size of 1 GiB (e.g., gigantic pages in the hypervisor) and a Linux memory block size of 128MB. While we could compress the state into 2 bit, using 8 bit is much easier. This handling is similar, but different to acpi_scan_try_to_offline(): a) We don't try to offline twice. I am not sure if this CONFIG_MEMCG optimization is still relevant - it should only apply to ZONE_NORMAL (where we have no guarantees). If relevant, we can always add it. b) acpi_scan_try_to_offline() simply onlines all memory in case something goes wrong. It doesn't restore previous online type. Let's do that, so we won't overwrite what e.g., user space configured. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-28-david@redhat.com Signed-off-by: Michael S. Tsirkin Acked-by: Andrew Morton --- mm/memory_hotplug.c | 105 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 16 deletions(-) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 63b2e46b6555..2b6cc42ba0a3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1788,39 +1788,112 @@ int remove_memory(int nid, u64 start, u64 size) } EXPORT_SYMBOL_GPL(remove_memory); +static int try_offline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t online_type = MMOP_ONLINE_KERNEL; + uint8_t **online_types = arg; + struct page *page; + int rc; + + /* + * Sense the online_type via the zone of the memory block. Offlining + * with multiple zones within one memory block will be rejected + * by offlining code ... so we don't care about that. + */ + page = pfn_to_online_page(section_nr_to_pfn(mem->start_section_nr)); + if (page && zone_idx(page_zone(page)) == ZONE_MOVABLE) + online_type = MMOP_ONLINE_MOVABLE; + + rc = device_offline(&mem->dev); + /* + * Default is MMOP_OFFLINE - change it only if offlining succeeded, + * so try_reonline_memory_block() can do the right thing. + */ + if (!rc) + **online_types = online_type; + + (*online_types)++; + /* Ignore if already offline. */ + return rc < 0 ? rc : 0; +} + +static int try_reonline_memory_block(struct memory_block *mem, void *arg) +{ + uint8_t **online_types = arg; + int rc; + + if (**online_types != MMOP_OFFLINE) { + mem->online_type = **online_types; + rc = device_online(&mem->dev); + if (rc < 0) + pr_warn("%s: Failed to re-online memory: %d", + __func__, rc); + } + + /* Continue processing all remaining memory blocks. */ + (*online_types)++; + return 0; +} + /* - * Try to offline and remove a memory block. Might take a long time to - * finish in case memory is still in use. Primarily useful for memory devices - * that logically unplugged all memory (so it's no longer in use) and want to - * offline + remove the memory block. + * Try to offline and remove memory. Might take a long time to finish in case + * memory is still in use. Primarily useful for memory devices that logically + * unplugged all memory (so it's no longer in use) and want to offline + remove + * that memory. */ int offline_and_remove_memory(int nid, u64 start, u64 size) { - struct memory_block *mem; - int rc = -EINVAL; + const unsigned long mb_count = size / memory_block_size_bytes(); + uint8_t *online_types, *tmp; + int rc; if (!IS_ALIGNED(start, memory_block_size_bytes()) || - size != memory_block_size_bytes()) - return rc; + !IS_ALIGNED(size, memory_block_size_bytes()) || !size) + return -EINVAL; + + /* + * We'll remember the old online type of each memory block, so we can + * try to revert whatever we did when offlining one memory block fails + * after offlining some others succeeded. + */ + online_types = kmalloc_array(mb_count, sizeof(*online_types), + GFP_KERNEL); + if (!online_types) + return -ENOMEM; + /* + * Initialize all states to MMOP_OFFLINE, so when we abort processing in + * try_offline_memory_block(), we'll skip all unprocessed blocks in + * try_reonline_memory_block(). + */ + memset(online_types, MMOP_OFFLINE, mb_count); lock_device_hotplug(); - mem = find_memory_block(__pfn_to_section(PFN_DOWN(start))); - if (mem) - rc = device_offline(&mem->dev); - /* Ignore if the device is already offline. */ - if (rc > 0) - rc = 0; + + tmp = online_types; + rc = walk_memory_blocks(start, size, &tmp, try_offline_memory_block); /* - * In case we succeeded to offline the memory block, remove it. + * In case we succeeded to offline all memory, remove it. * This cannot fail as it cannot get onlined in the meantime. */ if (!rc) { rc = try_remove_memory(nid, start, size); - WARN_ON_ONCE(rc); + if (rc) + pr_err("%s: Failed to remove memory: %d", __func__, rc); + } + + /* + * Rollback what we did. While memory onlining might theoretically fail + * (nacked by a notifier), it barely ever happens. + */ + if (rc) { + tmp = online_types; + walk_memory_blocks(start, size, &tmp, + try_reonline_memory_block); } unlock_device_hotplug(); + kfree(online_types); return rc; } EXPORT_SYMBOL_GPL(offline_and_remove_memory); -- cgit v1.2.3 From 269ac9389db4854f7b05c4749ff051763e7578d3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:14 +0100 Subject: virtio-mem: Big Block Mode (BBM) - basic memory hotunplug Let's try to unplug completely offline big blocks first. Then, (if enabled via unplug_offline) try to offline and remove whole big blocks. No locking necessary - we can deal with concurrent onlining/offlining just fine. Note1: This is sub-optimal and might be dangerous in some environments: we could end up in an infinite loop when offlining (e.g., long-term pinnings), similar as with DIMMs. We'll introduce safe memory hotunplug via fake-offlining next, and use this basic mode only when explicitly enabled. Note2: Without ZONE_MOVABLE, memory unplug will be extremely unreliable with bigger block sizes. Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-29-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 156 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 155 insertions(+), 1 deletion(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 861149acafe5..f1696cdb7b0c 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -390,6 +390,12 @@ static int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm) _bb_id++) \ if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) +#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \ + for (_bb_id = vm->bbm.next_bb_id - 1; \ + _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \ + _bb_id--) \ + if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state) + /* * Set the state of a memory block, taking care of the state counter. */ @@ -685,6 +691,18 @@ static int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id) return virtio_mem_remove_memory(vm, addr, size); } +/* + * See virtio_mem_remove_memory(): Try to remove all Linux memory blocks covered + * by the big block. + */ +static int virtio_mem_bbm_remove_bb(struct virtio_mem *vm, unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_remove_memory(vm, addr, size); +} + /* * Try offlining and removing memory from Linux. * @@ -731,6 +749,19 @@ static int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm, return virtio_mem_offline_and_remove_memory(vm, addr, size); } +/* + * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a + * all Linux memory blocks covered by the big block. + */ +static int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id); + const uint64_t size = vm->bbm.bb_size; + + return virtio_mem_offline_and_remove_memory(vm, addr, size); +} + /* * Trigger the workqueue so the device can perform its magic. */ @@ -1928,6 +1959,129 @@ out_unlock: return rc; } +/* + * Try to offline and remove a big block from Linux and unplug it. Will fail + * with -EBUSY if some memory is busy and cannot get unplugged. + * + * Will modify the state of the memory block. Might temporarily drop the + * hotplug_mutex. + */ +static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); + if (rc) + return rc; + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; +} + +/* + * Try to remove a big block from Linux and unplug it. Will fail with + * -EBUSY if some memory is online. + * + * Will modify the state of the memory block. + */ +static int virtio_mem_bbm_remove_and_unplug_bb(struct virtio_mem *vm, + unsigned long bb_id) +{ + int rc; + + if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_ADDED)) + return -EINVAL; + + rc = virtio_mem_bbm_remove_bb(vm, bb_id); + if (rc) + return -EBUSY; + + rc = virtio_mem_bbm_unplug_bb(vm, bb_id); + if (rc) + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_PLUGGED); + else + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_UNUSED); + return rc; +} + +/* + * Test if a big block is completely offline. + */ +static bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm, + unsigned long bb_id) +{ + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long pfn; + + for (pfn = start_pfn; pfn < start_pfn + nr_pages; + pfn += PAGES_PER_SECTION) { + if (pfn_to_online_page(pfn)) + return false; + } + + return true; +} + +static int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff) +{ + uint64_t nb_bb = diff / vm->bbm.bb_size; + uint64_t bb_id; + int rc; + + if (!nb_bb) + return 0; + + /* Try to unplug completely offline big blocks first. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + /* + * As we're holding no locks, this check is racy as memory + * can get onlined in the meantime - but we'll fail gracefully. + */ + if (!virtio_mem_bbm_bb_is_offline(vm, bb_id)) + continue; + rc = virtio_mem_bbm_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + if (!unplug_online) + return 0; + + /* Try to unplug any big blocks. */ + virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) { + cond_resched(); + rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id); + if (rc == -EBUSY) + continue; + if (!rc) + nb_bb--; + if (rc || !nb_bb) + return rc; + } + + return nb_bb ? -EBUSY : 0; +} + /* * Try to unplug the requested amount of memory. */ @@ -1935,7 +2089,7 @@ static int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff) { if (vm->in_sbm) return virtio_mem_sbm_unplug_request(vm, diff); - return -EBUSY; + return virtio_mem_bbm_unplug_request(vm, diff); } /* -- cgit v1.2.3 From 3711387a7543f2716e52ce5a5d92e3d580423a40 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 12 Nov 2020 14:38:15 +0100 Subject: virtio-mem: Big Block Mode (BBM) - safe memory hotunplug Let's add a safe mechanism to unplug memory, avoiding long/endless loops when trying to offline memory - similar to in SBM. Fake-offline all memory (via alloc_contig_range()) before trying to offline+remove it. Use this mode as default, but allow to enable the other mode explicitly (which could give better memory hotunplug guarantees in some environments). The "unsafe" mode can be enabled e.g., via virtio_mem.bbm_safe_unplug=0 on the cmdline. Reviewed-by: Wei Yang Cc: "Michael S. Tsirkin" Cc: Jason Wang Cc: Pankaj Gupta Cc: Michal Hocko Cc: Oscar Salvador Cc: Wei Yang Cc: Andrew Morton Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20201112133815.13332-30-david@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/virtio/virtio_mem.c | 97 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index f1696cdb7b0c..9fc9ec4a25f5 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -37,6 +37,11 @@ module_param(bbm_block_size, ulong, 0444); MODULE_PARM_DESC(bbm_block_size, "Big Block size in bytes. Default is 0 (auto-detection)."); +static bool bbm_safe_unplug = true; +module_param(bbm_safe_unplug, bool, 0444); +MODULE_PARM_DESC(bbm_safe_unplug, + "Use a safe unplug mechanism in BBM, avoiding long/endless loops"); + /* * virtio-mem currently supports the following modes of operation: * @@ -87,6 +92,8 @@ enum virtio_mem_bbm_bb_state { VIRTIO_MEM_BBM_BB_PLUGGED, /* Plugged and added to Linux. */ VIRTIO_MEM_BBM_BB_ADDED, + /* All online parts are fake-offline, ready to remove. */ + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE, VIRTIO_MEM_BBM_BB_COUNT }; @@ -889,6 +896,32 @@ static void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm, } } +static void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) +{ + /* + * When marked as "fake-offline", all online memory of this device block + * is allocated by us. Otherwise, we don't have any memory allocated. + */ + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_going_offline(pfn, nr_pages); +} + +static void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm, + unsigned long bb_id, + unsigned long pfn, + unsigned long nr_pages) +{ + if (virtio_mem_bbm_get_bb_state(vm, bb_id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE) + return; + virtio_mem_fake_offline_cancel_offline(pfn, nr_pages); +} + /* * This callback will either be called synchronously from add_memory() or * asynchronously (e.g., triggered via user space). We have to be careful @@ -949,6 +982,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, vm->hotplug_active = true; if (vm->in_sbm) virtio_mem_sbm_notify_going_offline(vm, id); + else + virtio_mem_bbm_notify_going_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); break; case MEM_GOING_ONLINE: mutex_lock(&vm->hotplug_mutex); @@ -999,6 +1036,10 @@ static int virtio_mem_memory_notifier_cb(struct notifier_block *nb, break; if (vm->in_sbm) virtio_mem_sbm_notify_cancel_offline(vm, id); + else + virtio_mem_bbm_notify_cancel_offline(vm, id, + mhp->start_pfn, + mhp->nr_pages); vm->hotplug_active = false; mutex_unlock(&vm->hotplug_mutex); break; @@ -1189,7 +1230,13 @@ static void virtio_mem_online_page_cb(struct page *page, unsigned int order) do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1); } else { - do_online = true; + /* + * If the whole block is marked fake offline, keep + * everything that way. + */ + id = virtio_mem_phys_to_bb_id(vm, addr); + do_online = virtio_mem_bbm_get_bb_state(vm, id) != + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE; } if (do_online) generic_online_page(page, order); @@ -1969,15 +2016,50 @@ out_unlock: static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, unsigned long bb_id) { + const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id)); + const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size); + unsigned long end_pfn = start_pfn + nr_pages; + unsigned long pfn; + struct page *page; int rc; if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) != VIRTIO_MEM_BBM_BB_ADDED)) return -EINVAL; + if (bbm_safe_unplug) { + /* + * Start by fake-offlining all memory. Once we marked the device + * block as fake-offline, all newly onlined memory will + * automatically be kept fake-offline. Protect from concurrent + * onlining/offlining until we have a consistent state. + */ + mutex_lock(&vm->hotplug_mutex); + virtio_mem_bbm_set_bb_state(vm, bb_id, + VIRTIO_MEM_BBM_BB_FAKE_OFFLINE); + + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + + rc = virtio_mem_fake_offline(pfn, PAGES_PER_SECTION); + if (rc) { + end_pfn = pfn; + goto rollback_safe_unplug; + } + } + mutex_unlock(&vm->hotplug_mutex); + } + rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id); - if (rc) + if (rc) { + if (bbm_safe_unplug) { + mutex_lock(&vm->hotplug_mutex); + goto rollback_safe_unplug; + } return rc; + } rc = virtio_mem_bbm_unplug_bb(vm, bb_id); if (rc) @@ -1987,6 +2069,17 @@ static int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm, virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED); return rc; + +rollback_safe_unplug: + for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { + page = pfn_to_online_page(pfn); + if (!page) + continue; + virtio_mem_fake_online(pfn, PAGES_PER_SECTION); + } + virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED); + mutex_unlock(&vm->hotplug_mutex); + return rc; } /* -- cgit v1.2.3 From 0ab4b8901a8edda4fd1c2aded36192566d89353f Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Wed, 11 Nov 2020 09:14:48 +0800 Subject: vhost_vdpa: switch to vmemdup_user() Replace opencoded alloc and copy with vmemdup_user() Signed-off-by: Tian Tao Link: https://lore.kernel.org/r/1605057288-60400-1-git-send-email-tiantao6@hisilicon.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- drivers/vhost/vdpa.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index 29ed4173f04e..ef688c8c0e0e 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -245,14 +245,10 @@ static long vhost_vdpa_set_config(struct vhost_vdpa *v, return -EFAULT; if (vhost_vdpa_config_validate(v, &config)) return -EINVAL; - buf = kvzalloc(config.len, GFP_KERNEL); - if (!buf) - return -ENOMEM; - if (copy_from_user(buf, c->buf, config.len)) { - kvfree(buf); - return -EFAULT; - } + buf = vmemdup_user(c->buf, config.len); + if (IS_ERR(buf)) + return PTR_ERR(buf); ops->set_config(vdpa, config.off, buf, config.len); -- cgit v1.2.3 From 4d10367fd411437d55850357e471d9d5f9f47e72 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sun, 29 Nov 2020 13:54:34 +0100 Subject: vdpa: ifcvf: Use dma_set_mask_and_coherent to simplify code 'pci_set_dma_mask()' + 'pci_set_consistent_dma_mask()' can be replaced by an equivalent 'dma_set_mask_and_coherent()' which is much less verbose. While at it, fix a typo (s/confiugration/configuration) Signed-off-by: Christophe JAILLET Link: https://lore.kernel.org/r/20201129125434.1462638-1-christophe.jaillet@wanadoo.fr Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/ifcvf/ifcvf_main.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/drivers/vdpa/ifcvf/ifcvf_main.c b/drivers/vdpa/ifcvf/ifcvf_main.c index 8b4028556cb6..fa1af301cf55 100644 --- a/drivers/vdpa/ifcvf/ifcvf_main.c +++ b/drivers/vdpa/ifcvf/ifcvf_main.c @@ -417,16 +417,9 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id) return ret; } - ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(64)); + ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)); if (ret) { - IFCVF_ERR(pdev, "No usable DMA confiugration\n"); - return ret; - } - - ret = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)); - if (ret) { - IFCVF_ERR(pdev, - "No usable coherent DMA confiugration\n"); + IFCVF_ERR(pdev, "No usable DMA configuration\n"); return ret; } -- cgit v1.2.3 From 29b90f92ee64f4cae2d8ef83922286567da6c2c1 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:39 +0100 Subject: vdpa: remove unnecessary 'default n' in Kconfig entries 'default n' is not necessary since it is already the default when nothing is specified. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-2-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 6caf539091e5..2c892e890b9e 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -14,7 +14,6 @@ config VDPA_SIM select DMA_OPS select VHOST_RING select GENERIC_NET_UTILS - default n help vDPA networking device simulator which loop TX traffic back to RX. This device is used for testing, prototyping and @@ -23,7 +22,6 @@ config VDPA_SIM config IFCVF tristate "Intel IFC VF vDPA driver" depends on PCI_MSI - default n help This kernel module can drive Intel IFC VF NIC to offload virtio dataplane traffic to hardware. @@ -42,7 +40,6 @@ config MLX5_VDPA_NET tristate "vDPA driver for ConnectX devices" select MLX5_VDPA depends on MLX5_CORE - default n help VDPA network driver for ConnectX6 and newer. Provides offloading of virtio net datapath such that descriptors put on the ring will -- cgit v1.2.3 From cc3d42386d14176e392d61da1de05c1d87c18b93 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:40 +0100 Subject: vdpa_sim: remove unnecessary headers inclusion Some headers are not necessary, so let's remove them to do some cleaning. Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-3-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6a90fdb9cbfc..63b85541fb5e 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -11,16 +11,9 @@ #include #include #include -#include -#include #include #include -#include -#include -#include #include -#include -#include #include #include #include -- cgit v1.2.3 From 423248d60d2b655321fc49eca1545f95a1bc9d6c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 15 Dec 2020 15:42:41 +0100 Subject: vdpa_sim: remove hard-coded virtq count Add a new attribute that will define the number of virt queues to be created for the vdpasim device. Signed-off-by: Max Gurtovoy [sgarzare: replace kmalloc_array() with kcalloc()] Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-4-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 63b85541fb5e..07ccc8609784 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -63,7 +63,7 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; - struct vdpasim_virtqueue vqs[VDPASIM_VQ_NUM]; + struct vdpasim_virtqueue *vqs; struct work_struct work; /* spinlock to synchronize virtqueue state */ spinlock_t lock; @@ -73,6 +73,7 @@ struct vdpasim { u32 status; u32 generation; u64 features; + int nvqs; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; @@ -137,7 +138,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < VDPASIM_VQ_NUM; i++) + for (i = 0; i < vdpasim->nvqs; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); @@ -343,7 +344,7 @@ static struct vdpasim *vdpasim_create(void) const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; struct device *dev; - int ret = -ENOMEM; + int i, ret = -ENOMEM; if (batch_mapping) ops = &vdpasim_net_batch_config_ops; @@ -354,6 +355,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim) goto err_alloc; + vdpasim->nvqs = VDPASIM_VQ_NUM; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -364,6 +366,11 @@ static struct vdpasim *vdpasim_create(void) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); + vdpasim->vqs = kcalloc(vdpasim->nvqs, sizeof(struct vdpasim_virtqueue), + GFP_KERNEL); + if (!vdpasim->vqs) + goto err_iommu; + vdpasim->iommu = vhost_iotlb_alloc(2048, 0); if (!vdpasim->iommu) goto err_iommu; @@ -382,8 +389,8 @@ static struct vdpasim *vdpasim_create(void) eth_random_addr(vdpasim->config.mac); } - vringh_set_iotlb(&vdpasim->vqs[0].vring, vdpasim->iommu); - vringh_set_iotlb(&vdpasim->vqs[1].vring, vdpasim->iommu); + for (i = 0; i < vdpasim->nvqs; i++) + vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; ret = vdpa_register_device(&vdpasim->vdpa); @@ -652,6 +659,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) kfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); + kfree(vdpasim->vqs); } static const struct vdpa_config_ops vdpasim_net_config_ops = { -- cgit v1.2.3 From 2fc0ebfa039025d88009e8f275ea8bcd177a9cd9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:42 +0100 Subject: vdpa_sim: make IOTLB entries limit configurable Some devices may require a higher limit for the number of IOTLB entries, so let's make it configurable through a module parameter. By default, it's initialized with the current limit (2048). Suggested-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-5-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 07ccc8609784..d716bfaadb3b 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -31,6 +31,11 @@ static int batch_mapping = 1; module_param(batch_mapping, int, 0444); MODULE_PARM_DESC(batch_mapping, "Batched mapping 1 -Enable; 0 - Disable"); +static int max_iotlb_entries = 2048; +module_param(max_iotlb_entries, int, 0444); +MODULE_PARM_DESC(max_iotlb_entries, + "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); + static char *macaddr; module_param(macaddr, charp, 0); MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); @@ -371,7 +376,7 @@ static struct vdpasim *vdpasim_create(void) if (!vdpasim->vqs) goto err_iommu; - vdpasim->iommu = vhost_iotlb_alloc(2048, 0); + vdpasim->iommu = vhost_iotlb_alloc(max_iotlb_entries, 0); if (!vdpasim->iommu) goto err_iommu; -- cgit v1.2.3 From 36a9c30630256629e62a9186793c28735ade3ffc Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:43 +0100 Subject: vdpa_sim: rename vdpasim_config_ops variables These variables store generic callbacks used by the vDPA simulator core, so we can remove the 'net' word in their names. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-6-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d716bfaadb3b..923f29076b1b 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -341,8 +341,8 @@ static const struct dma_map_ops vdpasim_dma_ops = { .free = vdpasim_free_coherent, }; -static const struct vdpa_config_ops vdpasim_net_config_ops; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops; +static const struct vdpa_config_ops vdpasim_config_ops; +static const struct vdpa_config_ops vdpasim_batch_config_ops; static struct vdpasim *vdpasim_create(void) { @@ -352,9 +352,9 @@ static struct vdpasim *vdpasim_create(void) int i, ret = -ENOMEM; if (batch_mapping) - ops = &vdpasim_net_batch_config_ops; + ops = &vdpasim_batch_config_ops; else - ops = &vdpasim_net_config_ops; + ops = &vdpasim_config_ops; vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); if (!vdpasim) @@ -667,7 +667,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) kfree(vdpasim->vqs); } -static const struct vdpa_config_ops vdpasim_net_config_ops = { +static const struct vdpa_config_ops vdpasim_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, @@ -694,7 +694,7 @@ static const struct vdpa_config_ops vdpasim_net_config_ops = { .free = vdpasim_free, }; -static const struct vdpa_config_ops vdpasim_net_batch_config_ops = { +static const struct vdpa_config_ops vdpasim_batch_config_ops = { .set_vq_address = vdpasim_set_vq_address, .set_vq_num = vdpasim_set_vq_num, .kick_vq = vdpasim_kick_vq, -- cgit v1.2.3 From 6c6e28fe45794054410ad8cd2770af69fbe0338d Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:44 +0100 Subject: vdpa_sim: add struct vdpasim_dev_attr for device attributes vdpasim_dev_attr will contain device specific attributes. We starting moving the number of virtqueues (i.e. nvqs) to vdpasim_dev_attr. vdpasim_create() creates a new vDPA simulator following the device attributes defined in the vdpasim_dev_attr parameter. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-7-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 923f29076b1b..ae295dc57d7d 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -65,11 +65,16 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | (1ULL << VIRTIO_F_ACCESS_PLATFORM) | (1ULL << VIRTIO_NET_F_MAC); +struct vdpasim_dev_attr { + int nvqs; +}; + /* State of each vdpasim device */ struct vdpasim { struct vdpa_device vdpa; struct vdpasim_virtqueue *vqs; struct work_struct work; + struct vdpasim_dev_attr dev_attr; /* spinlock to synchronize virtqueue state */ spinlock_t lock; struct virtio_net_config config; @@ -78,7 +83,6 @@ struct vdpasim { u32 status; u32 generation; u64 features; - int nvqs; /* spinlock to synchronize iommu table */ spinlock_t iommu_lock; }; @@ -143,7 +147,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) { int i; - for (i = 0; i < vdpasim->nvqs; i++) + for (i = 0; i < vdpasim->dev_attr.nvqs; i++) vdpasim_vq_reset(&vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); @@ -344,7 +348,7 @@ static const struct dma_map_ops vdpasim_dma_ops = { static const struct vdpa_config_ops vdpasim_config_ops; static const struct vdpa_config_ops vdpasim_batch_config_ops; -static struct vdpasim *vdpasim_create(void) +static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; @@ -356,11 +360,12 @@ static struct vdpasim *vdpasim_create(void) else ops = &vdpasim_config_ops; - vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, VDPASIM_VQ_NUM); + vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops, + dev_attr->nvqs); if (!vdpasim) goto err_alloc; - vdpasim->nvqs = VDPASIM_VQ_NUM; + vdpasim->dev_attr = *dev_attr; INIT_WORK(&vdpasim->work, vdpasim_work); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -371,7 +376,7 @@ static struct vdpasim *vdpasim_create(void) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); - vdpasim->vqs = kcalloc(vdpasim->nvqs, sizeof(struct vdpasim_virtqueue), + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), GFP_KERNEL); if (!vdpasim->vqs) goto err_iommu; @@ -394,7 +399,7 @@ static struct vdpasim *vdpasim_create(void) eth_random_addr(vdpasim->config.mac); } - for (i = 0; i < vdpasim->nvqs; i++) + for (i = 0; i < dev_attr->nvqs; i++) vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; @@ -722,7 +727,11 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { static int __init vdpasim_dev_init(void) { - vdpasim_dev = vdpasim_create(); + struct vdpasim_dev_attr dev_attr = {}; + + dev_attr.nvqs = VDPASIM_VQ_NUM; + + vdpasim_dev = vdpasim_create(&dev_attr); if (!IS_ERR(vdpasim_dev)) return 0; -- cgit v1.2.3 From 2f8f461888052f1b92ebe6419514355538f7cd68 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:45 +0100 Subject: vdpa_sim: add device id field in vdpasim_dev_attr Remove VDPASIM_DEVICE_ID macro and add 'id' field in vdpasim_dev_attr, that will be returned by vdpasim_get_device_id(). Use VIRTIO_ID_NET for vDPA-net simulator device id. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-8-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ae295dc57d7d..3a8e57ac7762 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -55,7 +55,6 @@ struct vdpasim_virtqueue { #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 -#define VDPASIM_DEVICE_ID 0x1 #define VDPASIM_VENDOR_ID 0 #define VDPASIM_VQ_NUM 0x2 #define VDPASIM_NAME "vdpasim-netdev" @@ -67,6 +66,7 @@ static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | struct vdpasim_dev_attr { int nvqs; + u32 id; }; /* State of each vdpasim device */ @@ -546,7 +546,9 @@ static u16 vdpasim_get_vq_num_max(struct vdpa_device *vdpa) static u32 vdpasim_get_device_id(struct vdpa_device *vdpa) { - return VDPASIM_DEVICE_ID; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.id; } static u32 vdpasim_get_vendor_id(struct vdpa_device *vdpa) @@ -729,6 +731,7 @@ static int __init vdpasim_dev_init(void) { struct vdpasim_dev_attr dev_attr = {}; + dev_attr.id = VIRTIO_ID_NET; dev_attr.nvqs = VDPASIM_VQ_NUM; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From 011c35bac5ef25f701d9a79bc731782889c0ff58 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:46 +0100 Subject: vdpa_sim: add supported_features field in vdpasim_dev_attr Introduce a new VDPASIM_FEATURES macro with the generic features supported by the vDPA simulator, and VDPASIM_NET_FEATURES macro with vDPA-net features. Add 'supported_features' field in vdpasim_dev_attr, to allow devices to specify their features. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-9-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 3a8e57ac7762..6cf3c78b0e33 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -59,12 +59,15 @@ struct vdpasim_virtqueue { #define VDPASIM_VQ_NUM 0x2 #define VDPASIM_NAME "vdpasim-netdev" -static u64 vdpasim_features = (1ULL << VIRTIO_F_ANY_LAYOUT) | - (1ULL << VIRTIO_F_VERSION_1) | - (1ULL << VIRTIO_F_ACCESS_PLATFORM) | - (1ULL << VIRTIO_NET_F_MAC); +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC)) struct vdpasim_dev_attr { + u64 supported_features; int nvqs; u32 id; }; @@ -122,7 +125,7 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; - vringh_init_iotlb(&vq->vring, vdpasim_features, + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, VDPASIM_QUEUE_MAX, false, (struct vring_desc *)(uintptr_t)vq->desc_addr, (struct vring_avail *) @@ -131,7 +134,8 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) (uintptr_t)vq->device_addr); } -static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) +static void vdpasim_vq_reset(struct vdpasim *vdpasim, + struct vdpasim_virtqueue *vq) { vq->ready = false; vq->desc_addr = 0; @@ -139,8 +143,8 @@ static void vdpasim_vq_reset(struct vdpasim_virtqueue *vq) vq->device_addr = 0; vq->cb = NULL; vq->private = NULL; - vringh_init_iotlb(&vq->vring, vdpasim_features, VDPASIM_QUEUE_MAX, - false, NULL, NULL, NULL); + vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, + VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); } static void vdpasim_reset(struct vdpasim *vdpasim) @@ -148,7 +152,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) int i; for (i = 0; i < vdpasim->dev_attr.nvqs; i++) - vdpasim_vq_reset(&vdpasim->vqs[i]); + vdpasim_vq_reset(vdpasim, &vdpasim->vqs[i]); spin_lock(&vdpasim->iommu_lock); vhost_iotlb_reset(vdpasim->iommu); @@ -508,7 +512,9 @@ static u32 vdpasim_get_vq_align(struct vdpa_device *vdpa) static u64 vdpasim_get_features(struct vdpa_device *vdpa) { - return vdpasim_features; + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + return vdpasim->dev_attr.supported_features; } static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) @@ -520,7 +526,7 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) return -EINVAL; - vdpasim->features = features & vdpasim_features; + vdpasim->features = features & vdpasim->dev_attr.supported_features; /* We generally only know whether guest is using the legacy interface * here, so generally that's the earliest we can set config fields. @@ -732,6 +738,7 @@ static int __init vdpasim_dev_init(void) struct vdpasim_dev_attr dev_attr = {}; dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From a13b5918fdd0dd7987aa5f3c202f68ed6ad468bb Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:47 +0100 Subject: vdpa_sim: add work_fn in vdpasim_dev_attr Rename vdpasim_work() in vdpasim_net_work() and add it to the vdpasim_dev_attr structure. Co-developed-by: Max Gurtovoy Signed-off-by: Max Gurtovoy Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-10-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 6cf3c78b0e33..d356929f9dd3 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -70,6 +70,8 @@ struct vdpasim_dev_attr { u64 supported_features; int nvqs; u32 id; + + work_func_t work_fn; }; /* State of each vdpasim device */ @@ -163,7 +165,7 @@ static void vdpasim_reset(struct vdpasim *vdpasim) ++vdpasim->generation; } -static void vdpasim_work(struct work_struct *work) +static void vdpasim_net_work(struct work_struct *work) { struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); @@ -370,7 +372,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_alloc; vdpasim->dev_attr = *dev_attr; - INIT_WORK(&vdpasim->work, vdpasim_work); + INIT_WORK(&vdpasim->work, dev_attr->work_fn); spin_lock_init(&vdpasim->lock); spin_lock_init(&vdpasim->iommu_lock); @@ -740,6 +742,7 @@ static int __init vdpasim_dev_init(void) dev_attr.id = VIRTIO_ID_NET; dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; + dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From cf1a3b35382c10ce315c32bd2b3d7789897fbe13 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:48 +0100 Subject: vdpa_sim: store parsed MAC address in a buffer As preparation for the next patches, we store the MAC address, parsed during the vdpasim_create(), in a buffer that will be used to fill 'config' together with other configurations. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-11-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index d356929f9dd3..ccb40501cbd7 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -40,6 +40,8 @@ static char *macaddr; module_param(macaddr, charp, 0); MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); +u8 macaddr_buf[ETH_ALEN]; + struct vdpasim_virtqueue { struct vringh vring; struct vringh_kiov iov; @@ -396,13 +398,13 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_iommu; if (macaddr) { - mac_pton(macaddr, vdpasim->config.mac); - if (!is_valid_ether_addr(vdpasim->config.mac)) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { ret = -EADDRNOTAVAIL; goto err_iommu; } } else { - eth_random_addr(vdpasim->config.mac); + eth_random_addr(macaddr_buf); } for (i = 0; i < dev_attr->nvqs; i++) @@ -538,6 +540,8 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) config->mtu = cpu_to_vdpasim16(vdpasim, 1500); config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(config->mac, macaddr_buf, ETH_ALEN); + return 0; } -- cgit v1.2.3 From f37cbbc65178e0a45823d281d290c4c02da9631c Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:49 +0100 Subject: vdpa_sim: make 'config' generic and usable for any device type Add new 'config_size' attribute in 'vdpasim_dev_attr' and allocates 'config' dynamically to support any device types. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-12-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index ccb40501cbd7..4a0a6cadb9ff 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -70,6 +70,7 @@ struct vdpasim_virtqueue { struct vdpasim_dev_attr { u64 supported_features; + size_t config_size; int nvqs; u32 id; @@ -84,7 +85,8 @@ struct vdpasim { struct vdpasim_dev_attr dev_attr; /* spinlock to synchronize virtqueue state */ spinlock_t lock; - struct virtio_net_config config; + /* virtio config according to device type */ + void *config; struct vhost_iotlb *iommu; void *buffer; u32 status; @@ -384,6 +386,10 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) goto err_iommu; set_dma_ops(dev, &vdpasim_dma_ops); + vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL); + if (!vdpasim->config) + goto err_iommu; + vdpasim->vqs = kcalloc(dev_attr->nvqs, sizeof(struct vdpasim_virtqueue), GFP_KERNEL); if (!vdpasim->vqs) @@ -524,7 +530,8 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct virtio_net_config *config = &vdpasim->config; + struct virtio_net_config *config = + (struct virtio_net_config *)vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) @@ -596,8 +603,8 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - if (offset + len < sizeof(struct virtio_net_config)) - memcpy(buf, (u8 *)&vdpasim->config + offset, len); + if (offset + len < vdpasim->dev_attr.config_size) + memcpy(buf, vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, @@ -684,6 +691,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); kfree(vdpasim->vqs); + kfree(vdpasim->config); } static const struct vdpa_config_ops vdpasim_config_ops = { @@ -746,6 +754,7 @@ static int __init vdpasim_dev_init(void) dev_attr.id = VIRTIO_ID_NET; dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From 65b709586e222fa6ffd4166ac7fdb5d5dad113ee Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:50 +0100 Subject: vdpa_sim: add get_config callback in vdpasim_dev_attr The get_config callback can be used by the device to fill the config structure. The callback will be invoked in vdpasim_get_config() before copying bytes into caller buffer. Move vDPA-net config updates from vdpasim_set_features() in the new vdpasim_net_get_config() callback. This is safe since in vdpa_get_config() we already check that .set_features() callback is called before .get_config(). Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-13-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 4a0a6cadb9ff..5eadcd19ab6f 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -68,6 +68,8 @@ struct vdpasim_virtqueue { #define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ (1ULL << VIRTIO_NET_F_MAC)) +struct vdpasim; + struct vdpasim_dev_attr { u64 supported_features; size_t config_size; @@ -75,6 +77,7 @@ struct vdpasim_dev_attr { u32 id; work_func_t work_fn; + void (*get_config)(struct vdpasim *vdpasim, void *config); }; /* State of each vdpasim device */ @@ -530,8 +533,6 @@ static u64 vdpasim_get_features(struct vdpa_device *vdpa) static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - struct virtio_net_config *config = - (struct virtio_net_config *)vdpasim->config; /* DMA mapping must be done by driver */ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM))) @@ -539,16 +540,6 @@ static int vdpasim_set_features(struct vdpa_device *vdpa, u64 features) vdpasim->features = features & vdpasim->dev_attr.supported_features; - /* We generally only know whether guest is using the legacy interface - * here, so generally that's the earliest we can set config fields. - * Note: We actually require VIRTIO_F_ACCESS_PLATFORM above which - * implies VIRTIO_F_VERSION_1, but let's not try to be clever here. - */ - - config->mtu = cpu_to_vdpasim16(vdpasim, 1500); - config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(config->mac, macaddr_buf, ETH_ALEN); - return 0; } @@ -603,8 +594,13 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, { struct vdpasim *vdpasim = vdpa_to_sim(vdpa); - if (offset + len < vdpasim->dev_attr.config_size) - memcpy(buf, vdpasim->config + offset, len); + if (offset + len > vdpasim->dev_attr.config_size) + return; + + if (vdpasim->dev_attr.get_config) + vdpasim->dev_attr.get_config(vdpasim, vdpasim->config); + + memcpy(buf, vdpasim->config + offset, len); } static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, @@ -747,6 +743,16 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .free = vdpasim_free, }; +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + static int __init vdpasim_dev_init(void) { struct vdpasim_dev_attr dev_attr = {}; @@ -755,6 +761,7 @@ static int __init vdpasim_dev_init(void) dev_attr.supported_features = VDPASIM_NET_FEATURES; dev_attr.nvqs = VDPASIM_VQ_NUM; dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; dev_attr.work_fn = vdpasim_net_work; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From c124a95e304bc5d37144e2fff6e52bb904d41810 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:51 +0100 Subject: vdpa_sim: add set_config callback in vdpasim_dev_attr The set_config callback can be used by the device to parse the config structure modified by the driver. The callback will be invoked, if set, in vdpasim_set_config() after copying bytes from caller buffer into vdpasim->config buffer. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-14-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 5eadcd19ab6f..e219aa852ef8 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -78,6 +78,7 @@ struct vdpasim_dev_attr { work_func_t work_fn; void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); }; /* State of each vdpasim device */ @@ -606,7 +607,15 @@ static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset, static void vdpasim_set_config(struct vdpa_device *vdpa, unsigned int offset, const void *buf, unsigned int len) { - /* No writable config supportted by vdpasim */ + struct vdpasim *vdpasim = vdpa_to_sim(vdpa); + + if (offset + len > vdpasim->dev_attr.config_size) + return; + + memcpy(vdpasim->config + offset, buf, len); + + if (vdpasim->dev_attr.set_config) + vdpasim->dev_attr.set_config(vdpasim, vdpasim->config); } static u32 vdpasim_get_generation(struct vdpa_device *vdpa) -- cgit v1.2.3 From b240491b7a48028fb67e5377ffd1be21e9260c4e Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:52 +0100 Subject: vdpa_sim: set vringh notify callback Instead of calling the vq callback directly, we can leverage the vringh_notify() function, adding vdpasim_vq_notify() and setting it in the vringh notify callback. Suggested-by: Jason Wang Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-15-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index e219aa852ef8..19ff5e352782 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -131,6 +131,17 @@ static struct vdpasim *dev_to_sim(struct device *dev) return vdpa_to_sim(vdpa); } +static void vdpasim_vq_notify(struct vringh *vring) +{ + struct vdpasim_virtqueue *vq = + container_of(vring, struct vdpasim_virtqueue, vring); + + if (!vq->cb) + return; + + vq->cb(vq->private); +} + static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) { struct vdpasim_virtqueue *vq = &vdpasim->vqs[idx]; @@ -142,6 +153,8 @@ static void vdpasim_queue_ready(struct vdpasim *vdpasim, unsigned int idx) (uintptr_t)vq->driver_addr, (struct vring_used *) (uintptr_t)vq->device_addr); + + vq->vring.notify = vdpasim_vq_notify; } static void vdpasim_vq_reset(struct vdpasim *vdpasim, @@ -155,6 +168,8 @@ static void vdpasim_vq_reset(struct vdpasim *vdpasim, vq->private = NULL; vringh_init_iotlb(&vq->vring, vdpasim->dev_attr.supported_features, VDPASIM_QUEUE_MAX, false, NULL, NULL, NULL); + + vq->vring.notify = NULL; } static void vdpasim_reset(struct vdpasim *vdpasim) @@ -231,10 +246,10 @@ static void vdpasim_net_work(struct work_struct *work) smp_wmb(); local_bh_disable(); - if (txq->cb) - txq->cb(txq->private); - if (rxq->cb) - rxq->cb(rxq->private); + if (vringh_need_notify_iotlb(&txq->vring) > 0) + vringh_notify(&txq->vring); + if (vringh_need_notify_iotlb(&rxq->vring) > 0) + vringh_notify(&rxq->vring); local_bh_enable(); if (++pkts > 4) { -- cgit v1.2.3 From 165be1f80b8807687f7426d3f36f1031d633e979 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:53 +0100 Subject: vdpa_sim: use kvmalloc to allocate vdpasim->buffer The next patch will make the buffer size configurable from each device. Since the buffer could be larger than a page, we use kvmalloc() instead of kmalloc(). Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-16-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 19ff5e352782..87529899033a 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -418,7 +418,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->iommu) goto err_iommu; - vdpasim->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); + vdpasim->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; @@ -707,7 +707,7 @@ static void vdpasim_free(struct vdpa_device *vdpa) struct vdpasim *vdpasim = vdpa_to_sim(vdpa); cancel_work_sync(&vdpasim->work); - kfree(vdpasim->buffer); + kvfree(vdpasim->buffer); if (vdpasim->iommu) vhost_iotlb_free(vdpasim->iommu); kfree(vdpasim->vqs); -- cgit v1.2.3 From da7af6967c6e9815f8da60a8db1d0fe35b8e97b9 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:54 +0100 Subject: vdpa_sim: make vdpasim->buffer size configurable Allow each device to specify the size of the buffer allocated in vdpa_sim. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-17-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 87529899033a..60e45db29b15 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -73,6 +73,7 @@ struct vdpasim; struct vdpasim_dev_attr { u64 supported_features; size_t config_size; + size_t buffer_size; int nvqs; u32 id; @@ -418,7 +419,7 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->iommu) goto err_iommu; - vdpasim->buffer = kvmalloc(PAGE_SIZE, GFP_KERNEL); + vdpasim->buffer = kvmalloc(dev_attr->buffer_size, GFP_KERNEL); if (!vdpasim->buffer) goto err_iommu; @@ -787,6 +788,7 @@ static int __init vdpasim_dev_init(void) dev_attr.config_size = sizeof(struct virtio_net_config); dev_attr.get_config = vdpasim_net_get_config; dev_attr.work_fn = vdpasim_net_work; + dev_attr.buffer_size = PAGE_SIZE; vdpasim_dev = vdpasim_create(&dev_attr); -- cgit v1.2.3 From 275900dfa17c32f0f52b460e1fbd769cf694ecd3 Mon Sep 17 00:00:00 2001 From: Stefano Garzarella Date: Tue, 15 Dec 2020 15:42:55 +0100 Subject: vdpa_sim: split vdpasim_virtqueue's iov field in out_iov and in_iov vringh_getdesc_iotlb() manages 2 iovs for writable and readable descriptors. This is very useful for the block device, where for each request we have both types of descriptor. Let's split the vdpasim_virtqueue's iov field in out_iov and in_iov to use them with vringh_getdesc_iotlb(). We are using VIRTIO terminology for "out" (readable by the device) and "in" (writable by the device) descriptors. Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-18-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa_sim/vdpa_sim.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 60e45db29b15..875e42390a13 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -44,7 +44,8 @@ u8 macaddr_buf[ETH_ALEN]; struct vdpasim_virtqueue { struct vringh vring; - struct vringh_kiov iov; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; unsigned short head; bool ready; u64 desc_addr; @@ -210,12 +211,12 @@ static void vdpasim_net_work(struct work_struct *work) while (true) { total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->iov, NULL, + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, &txq->head, GFP_ATOMIC); if (err <= 0) break; - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->iov, + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, &rxq->head, GFP_ATOMIC); if (err <= 0) { vringh_complete_iotlb(&txq->vring, txq->head, 0); @@ -223,13 +224,13 @@ static void vdpasim_net_work(struct work_struct *work) } while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->iov, + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, vdpasim->buffer, PAGE_SIZE); if (read <= 0) break; - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->iov, + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, vdpasim->buffer, read); if (write <= 0) break; -- cgit v1.2.3 From db1e8bb6c63a77b74b0c6b49662fc50d49d5f90b Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 15 Dec 2020 15:42:56 +0100 Subject: vdpa: split vdpasim to core and net modules Introduce new vdpa_sim_net and vdpa_sim (core) drivers. This is a preparation for adding a vdpa simulator module for block devices. Signed-off-by: Max Gurtovoy [sgarzare: various cleanups/fixes] Acked-by: Jason Wang Signed-off-by: Stefano Garzarella Link: https://lore.kernel.org/r/20201215144256.155342-19-sgarzare@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/Kconfig | 13 ++- drivers/vdpa/vdpa_sim/Makefile | 1 + drivers/vdpa/vdpa_sim/vdpa_sim.c | 221 +---------------------------------- drivers/vdpa/vdpa_sim/vdpa_sim.h | 105 +++++++++++++++++ drivers/vdpa/vdpa_sim/vdpa_sim_net.c | 177 ++++++++++++++++++++++++++++ 5 files changed, 298 insertions(+), 219 deletions(-) create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim.h create mode 100644 drivers/vdpa/vdpa_sim/vdpa_sim_net.c diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig index 2c892e890b9e..92a6396f8a73 100644 --- a/drivers/vdpa/Kconfig +++ b/drivers/vdpa/Kconfig @@ -9,15 +9,20 @@ menuconfig VDPA if VDPA config VDPA_SIM - tristate "vDPA device simulator" + tristate "vDPA device simulator core" depends on RUNTIME_TESTING_MENU && HAS_DMA select DMA_OPS select VHOST_RING + help + Enable this module to support vDPA device simulators. These devices + are used for testing, prototyping and development of vDPA. + +config VDPA_SIM_NET + tristate "vDPA simulator for networking device" + depends on VDPA_SIM select GENERIC_NET_UTILS help - vDPA networking device simulator which loop TX traffic back - to RX. This device is used for testing, prototyping and - development of vDPA. + vDPA networking device simulator which loops TX traffic back to RX. config IFCVF tristate "Intel IFC VF vDPA driver" diff --git a/drivers/vdpa/vdpa_sim/Makefile b/drivers/vdpa/vdpa_sim/Makefile index b40278f65e04..79d4536d347e 100644 --- a/drivers/vdpa/vdpa_sim/Makefile +++ b/drivers/vdpa/vdpa_sim/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o +obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.c b/drivers/vdpa/vdpa_sim/vdpa_sim.c index 875e42390a13..b3fcc67bfdf0 100644 --- a/drivers/vdpa/vdpa_sim/vdpa_sim.c +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * VDPA networking device simulator. + * VDPA device simulator core. * * Copyright (c) 2020, Red Hat Inc. All rights reserved. * Author: Jason Wang @@ -14,17 +14,15 @@ #include #include #include -#include #include #include -#include #include -#include -#include + +#include "vdpa_sim.h" #define DRV_VERSION "0.1" #define DRV_AUTHOR "Jason Wang " -#define DRV_DESC "vDPA Device Simulator" +#define DRV_DESC "vDPA Device Simulator core" #define DRV_LICENSE "GPL v2" static int batch_mapping = 1; @@ -36,90 +34,9 @@ module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. 0 means unlimited. (default: 2048)"); -static char *macaddr; -module_param(macaddr, charp, 0); -MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); - -u8 macaddr_buf[ETH_ALEN]; - -struct vdpasim_virtqueue { - struct vringh vring; - struct vringh_kiov in_iov; - struct vringh_kiov out_iov; - unsigned short head; - bool ready; - u64 desc_addr; - u64 device_addr; - u64 driver_addr; - u32 num; - void *private; - irqreturn_t (*cb)(void *data); -}; - #define VDPASIM_QUEUE_ALIGN PAGE_SIZE #define VDPASIM_QUEUE_MAX 256 #define VDPASIM_VENDOR_ID 0 -#define VDPASIM_VQ_NUM 0x2 -#define VDPASIM_NAME "vdpasim-netdev" - -#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ - (1ULL << VIRTIO_F_VERSION_1) | \ - (1ULL << VIRTIO_F_ACCESS_PLATFORM)) - -#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ - (1ULL << VIRTIO_NET_F_MAC)) - -struct vdpasim; - -struct vdpasim_dev_attr { - u64 supported_features; - size_t config_size; - size_t buffer_size; - int nvqs; - u32 id; - - work_func_t work_fn; - void (*get_config)(struct vdpasim *vdpasim, void *config); - void (*set_config)(struct vdpasim *vdpasim, const void *config); -}; - -/* State of each vdpasim device */ -struct vdpasim { - struct vdpa_device vdpa; - struct vdpasim_virtqueue *vqs; - struct work_struct work; - struct vdpasim_dev_attr dev_attr; - /* spinlock to synchronize virtqueue state */ - spinlock_t lock; - /* virtio config according to device type */ - void *config; - struct vhost_iotlb *iommu; - void *buffer; - u32 status; - u32 generation; - u64 features; - /* spinlock to synchronize iommu table */ - spinlock_t iommu_lock; -}; - -/* TODO: cross-endian support */ -static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) -{ - return virtio_legacy_is_little_endian() || - (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); -} - -static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) -{ - return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); -} - -static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) -{ - return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); -} - -static struct vdpasim *vdpasim_dev; static struct vdpasim *vdpa_to_sim(struct vdpa_device *vdpa) { @@ -190,80 +107,6 @@ static void vdpasim_reset(struct vdpasim *vdpasim) ++vdpasim->generation; } -static void vdpasim_net_work(struct work_struct *work) -{ - struct vdpasim *vdpasim = container_of(work, struct - vdpasim, work); - struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; - struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; - ssize_t read, write; - size_t total_write; - int pkts = 0; - int err; - - spin_lock(&vdpasim->lock); - - if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) - goto out; - - if (!txq->ready || !rxq->ready) - goto out; - - while (true) { - total_write = 0; - err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, - &txq->head, GFP_ATOMIC); - if (err <= 0) - break; - - err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, - &rxq->head, GFP_ATOMIC); - if (err <= 0) { - vringh_complete_iotlb(&txq->vring, txq->head, 0); - break; - } - - while (true) { - read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, - vdpasim->buffer, - PAGE_SIZE); - if (read <= 0) - break; - - write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, - vdpasim->buffer, read); - if (write <= 0) - break; - - total_write += write; - } - - /* Make sure data is wrote before advancing index */ - smp_wmb(); - - vringh_complete_iotlb(&txq->vring, txq->head, 0); - vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); - - /* Make sure used is visible before rasing the interrupt. */ - smp_wmb(); - - local_bh_disable(); - if (vringh_need_notify_iotlb(&txq->vring) > 0) - vringh_notify(&txq->vring); - if (vringh_need_notify_iotlb(&rxq->vring) > 0) - vringh_notify(&rxq->vring); - local_bh_enable(); - - if (++pkts > 4) { - schedule_work(&vdpasim->work); - goto out; - } - } - -out: - spin_unlock(&vdpasim->lock); -} - static int dir_to_perm(enum dma_data_direction dir) { int perm = -EFAULT; @@ -379,7 +222,7 @@ static const struct dma_map_ops vdpasim_dma_ops = { static const struct vdpa_config_ops vdpasim_config_ops; static const struct vdpa_config_ops vdpasim_batch_config_ops; -static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) { const struct vdpa_config_ops *ops; struct vdpasim *vdpasim; @@ -424,23 +267,10 @@ static struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr) if (!vdpasim->buffer) goto err_iommu; - if (macaddr) { - mac_pton(macaddr, macaddr_buf); - if (!is_valid_ether_addr(macaddr_buf)) { - ret = -EADDRNOTAVAIL; - goto err_iommu; - } - } else { - eth_random_addr(macaddr_buf); - } - for (i = 0; i < dev_attr->nvqs; i++) vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu); vdpasim->vdpa.dma_dev = dev; - ret = vdpa_register_device(&vdpasim->vdpa); - if (ret) - goto err_iommu; return vdpasim; @@ -449,6 +279,7 @@ err_iommu: err_alloc: return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(vdpasim_create); static int vdpasim_set_vq_address(struct vdpa_device *vdpa, u16 idx, u64 desc_area, u64 driver_area, @@ -769,46 +600,6 @@ static const struct vdpa_config_ops vdpasim_batch_config_ops = { .free = vdpasim_free, }; -static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) -{ - struct virtio_net_config *net_config = - (struct virtio_net_config *)config; - - net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); - net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); - memcpy(net_config->mac, macaddr_buf, ETH_ALEN); -} - -static int __init vdpasim_dev_init(void) -{ - struct vdpasim_dev_attr dev_attr = {}; - - dev_attr.id = VIRTIO_ID_NET; - dev_attr.supported_features = VDPASIM_NET_FEATURES; - dev_attr.nvqs = VDPASIM_VQ_NUM; - dev_attr.config_size = sizeof(struct virtio_net_config); - dev_attr.get_config = vdpasim_net_get_config; - dev_attr.work_fn = vdpasim_net_work; - dev_attr.buffer_size = PAGE_SIZE; - - vdpasim_dev = vdpasim_create(&dev_attr); - - if (!IS_ERR(vdpasim_dev)) - return 0; - - return PTR_ERR(vdpasim_dev); -} - -static void __exit vdpasim_dev_exit(void) -{ - struct vdpa_device *vdpa = &vdpasim_dev->vdpa; - - vdpa_unregister_device(vdpa); -} - -module_init(vdpasim_dev_init) -module_exit(vdpasim_dev_exit) - MODULE_VERSION(DRV_VERSION); MODULE_LICENSE(DRV_LICENSE); MODULE_AUTHOR(DRV_AUTHOR); diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim.h b/drivers/vdpa/vdpa_sim/vdpa_sim.h new file mode 100644 index 000000000000..b02142293d5b --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + */ + +#ifndef _VDPA_SIM_H +#define _VDPA_SIM_H + +#include +#include +#include +#include +#include + +#define VDPASIM_FEATURES ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VIRTIO_F_ACCESS_PLATFORM)) + +struct vdpasim; + +struct vdpasim_virtqueue { + struct vringh vring; + struct vringh_kiov in_iov; + struct vringh_kiov out_iov; + unsigned short head; + bool ready; + u64 desc_addr; + u64 device_addr; + u64 driver_addr; + u32 num; + void *private; + irqreturn_t (*cb)(void *data); +}; + +struct vdpasim_dev_attr { + u64 supported_features; + size_t config_size; + size_t buffer_size; + int nvqs; + u32 id; + + work_func_t work_fn; + void (*get_config)(struct vdpasim *vdpasim, void *config); + void (*set_config)(struct vdpasim *vdpasim, const void *config); +}; + +/* State of each vdpasim device */ +struct vdpasim { + struct vdpa_device vdpa; + struct vdpasim_virtqueue *vqs; + struct work_struct work; + struct vdpasim_dev_attr dev_attr; + /* spinlock to synchronize virtqueue state */ + spinlock_t lock; + /* virtio config according to device type */ + void *config; + struct vhost_iotlb *iommu; + void *buffer; + u32 status; + u32 generation; + u64 features; + /* spinlock to synchronize iommu table */ + spinlock_t iommu_lock; +}; + +struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *attr); + +/* TODO: cross-endian support */ +static inline bool vdpasim_is_little_endian(struct vdpasim *vdpasim) +{ + return virtio_legacy_is_little_endian() || + (vdpasim->features & (1ULL << VIRTIO_F_VERSION_1)); +} + +static inline u16 vdpasim16_to_cpu(struct vdpasim *vdpasim, __virtio16 val) +{ + return __virtio16_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio16 cpu_to_vdpasim16(struct vdpasim *vdpasim, u16 val) +{ + return __cpu_to_virtio16(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u32 vdpasim32_to_cpu(struct vdpasim *vdpasim, __virtio32 val) +{ + return __virtio32_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio32 cpu_to_vdpasim32(struct vdpasim *vdpasim, u32 val) +{ + return __cpu_to_virtio32(vdpasim_is_little_endian(vdpasim), val); +} + +static inline u64 vdpasim64_to_cpu(struct vdpasim *vdpasim, __virtio64 val) +{ + return __virtio64_to_cpu(vdpasim_is_little_endian(vdpasim), val); +} + +static inline __virtio64 cpu_to_vdpasim64(struct vdpasim *vdpasim, u64 val) +{ + return __cpu_to_virtio64(vdpasim_is_little_endian(vdpasim), val); +} + +#endif diff --git a/drivers/vdpa/vdpa_sim/vdpa_sim_net.c b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c new file mode 100644 index 000000000000..c10b6981fdab --- /dev/null +++ b/drivers/vdpa/vdpa_sim/vdpa_sim_net.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VDPA simulator for networking device. + * + * Copyright (c) 2020, Red Hat Inc. All rights reserved. + * Author: Jason Wang + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vdpa_sim.h" + +#define DRV_VERSION "0.1" +#define DRV_AUTHOR "Jason Wang " +#define DRV_DESC "vDPA Device Simulator for networking device" +#define DRV_LICENSE "GPL v2" + +#define VDPASIM_NET_FEATURES (VDPASIM_FEATURES | \ + (1ULL << VIRTIO_NET_F_MAC)) + +#define VDPASIM_NET_VQ_NUM 2 + +static char *macaddr; +module_param(macaddr, charp, 0); +MODULE_PARM_DESC(macaddr, "Ethernet MAC address"); + +u8 macaddr_buf[ETH_ALEN]; + +static struct vdpasim *vdpasim_net_dev; + +static void vdpasim_net_work(struct work_struct *work) +{ + struct vdpasim *vdpasim = container_of(work, struct vdpasim, work); + struct vdpasim_virtqueue *txq = &vdpasim->vqs[1]; + struct vdpasim_virtqueue *rxq = &vdpasim->vqs[0]; + ssize_t read, write; + size_t total_write; + int pkts = 0; + int err; + + spin_lock(&vdpasim->lock); + + if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK)) + goto out; + + if (!txq->ready || !rxq->ready) + goto out; + + while (true) { + total_write = 0; + err = vringh_getdesc_iotlb(&txq->vring, &txq->out_iov, NULL, + &txq->head, GFP_ATOMIC); + if (err <= 0) + break; + + err = vringh_getdesc_iotlb(&rxq->vring, NULL, &rxq->in_iov, + &rxq->head, GFP_ATOMIC); + if (err <= 0) { + vringh_complete_iotlb(&txq->vring, txq->head, 0); + break; + } + + while (true) { + read = vringh_iov_pull_iotlb(&txq->vring, &txq->out_iov, + vdpasim->buffer, + PAGE_SIZE); + if (read <= 0) + break; + + write = vringh_iov_push_iotlb(&rxq->vring, &rxq->in_iov, + vdpasim->buffer, read); + if (write <= 0) + break; + + total_write += write; + } + + /* Make sure data is wrote before advancing index */ + smp_wmb(); + + vringh_complete_iotlb(&txq->vring, txq->head, 0); + vringh_complete_iotlb(&rxq->vring, rxq->head, total_write); + + /* Make sure used is visible before rasing the interrupt. */ + smp_wmb(); + + local_bh_disable(); + if (vringh_need_notify_iotlb(&txq->vring) > 0) + vringh_notify(&txq->vring); + if (vringh_need_notify_iotlb(&rxq->vring) > 0) + vringh_notify(&rxq->vring); + local_bh_enable(); + + if (++pkts > 4) { + schedule_work(&vdpasim->work); + goto out; + } + } + +out: + spin_unlock(&vdpasim->lock); +} + +static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config) +{ + struct virtio_net_config *net_config = + (struct virtio_net_config *)config; + + net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500); + net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP); + memcpy(net_config->mac, macaddr_buf, ETH_ALEN); +} + +static int __init vdpasim_net_init(void) +{ + struct vdpasim_dev_attr dev_attr = {}; + int ret; + + if (macaddr) { + mac_pton(macaddr, macaddr_buf); + if (!is_valid_ether_addr(macaddr_buf)) { + ret = -EADDRNOTAVAIL; + goto out; + } + } else { + eth_random_addr(macaddr_buf); + } + + dev_attr.id = VIRTIO_ID_NET; + dev_attr.supported_features = VDPASIM_NET_FEATURES; + dev_attr.nvqs = VDPASIM_NET_VQ_NUM; + dev_attr.config_size = sizeof(struct virtio_net_config); + dev_attr.get_config = vdpasim_net_get_config; + dev_attr.work_fn = vdpasim_net_work; + dev_attr.buffer_size = PAGE_SIZE; + + vdpasim_net_dev = vdpasim_create(&dev_attr); + if (IS_ERR(vdpasim_net_dev)) { + ret = PTR_ERR(vdpasim_net_dev); + goto out; + } + + ret = vdpa_register_device(&vdpasim_net_dev->vdpa); + if (ret) + goto put_dev; + + return 0; + +put_dev: + put_device(&vdpasim_net_dev->vdpa.dev); +out: + return ret; +} + +static void __exit vdpasim_net_exit(void) +{ + struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa; + + vdpa_unregister_device(vdpa); +} + +module_init(vdpasim_net_init); +module_exit(vdpasim_net_exit); + +MODULE_VERSION(DRV_VERSION); +MODULE_LICENSE(DRV_LICENSE); +MODULE_AUTHOR(DRV_AUTHOR); +MODULE_DESCRIPTION(DRV_DESC); -- cgit v1.2.3 From 83ef73b27eb2363f44faf9c3ee28a3fe752cfd15 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 9 Dec 2020 16:00:04 +0200 Subject: vdpa/mlx5: Use write memory barrier after updating CQ index Make sure to put dma write memory barrier after updating CQ consumer index so the hardware knows that there are available CQE slots in the queue. Failure to do this can cause the update of the RX doorbell record to get updated before the CQ consumer index resulting in CQ overrun. Fixes: 1a86b377aa21 ("vdpa/mlx5: Add VDPA driver for supported mlx5 devices") Signed-off-by: Eli Cohen Link: https://lore.kernel.org/r/20201209140004.15892-1-elic@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/mlx5/net/mlx5_vnet.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/vdpa/mlx5/net/mlx5_vnet.c b/drivers/vdpa/mlx5/net/mlx5_vnet.c index 1fa6fcac8299..81b932f72e10 100644 --- a/drivers/vdpa/mlx5/net/mlx5_vnet.c +++ b/drivers/vdpa/mlx5/net/mlx5_vnet.c @@ -464,6 +464,11 @@ static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq) static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num) { mlx5_cq_set_ci(&mvq->cq.mcq); + + /* make sure CQ cosumer update is visible to the hardware before updating + * RX doorbell record. + */ + dma_wmb(); rx_post(&mvq->vqqp, num); if (mvq->event_cb.callback) mvq->event_cb.callback(mvq->event_cb.private); -- cgit v1.2.3 From 697d1549140cdcdc4cfcd0bf94e62643008972b7 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:03 +0800 Subject: tools/virtio: include asm/bug.h WARN_ON is used in drivers/vhost/vringh.c, to avoid build failure, need include asm/bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-2-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/bug.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/virtio/linux/bug.h b/tools/virtio/linux/bug.h index b14c2c3b6b85..813baf13f62a 100644 --- a/tools/virtio/linux/bug.h +++ b/tools/virtio/linux/bug.h @@ -2,6 +2,8 @@ #ifndef BUG_H #define BUG_H +#include + #define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond)) #define BUILD_BUG_ON(x) -- cgit v1.2.3 From b9ca93bcd186ec4144df91c619f6084cdad500ec Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:04 +0800 Subject: tools/virtio: add krealloc_array krealloc_array is used in drivers/vhost/vringh.c, add it to avoid build failure. Drop WARN_ON_ONCE, because duplicated with the one in bug.h Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-3-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/linux/kernel.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tools/virtio/linux/kernel.h b/tools/virtio/linux/kernel.h index 315e85cabeda..0b493542e61a 100644 --- a/tools/virtio/linux/kernel.h +++ b/tools/virtio/linux/kernel.h @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -117,6 +118,16 @@ static inline void free_page(unsigned long addr) # define unlikely(x) (__builtin_expect(!!(x), 0)) # endif +static inline void *krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t gfp) +{ + size_t bytes; + + if (unlikely(check_mul_overflow(new_n, new_size, &bytes))) + return NULL; + + return krealloc(p, bytes, gfp); +} + #define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__) #ifdef DEBUG #define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__) @@ -126,8 +137,6 @@ static inline void free_page(unsigned long addr) #define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) #define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__) -#define WARN_ON_ONCE(cond) (unlikely(cond) ? fprintf (stderr, "WARNING\n") : 0) - #define min(x, y) ({ \ typeof(x) _min1 = (x); \ typeof(y) _min2 = (y); \ -- cgit v1.2.3 From 1a5514cbb09aaf694d26ef26fd6da5c5d495cc22 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Wed, 9 Dec 2020 16:42:05 +0800 Subject: tools/virtio: add barrier for aarch64 Add barrier for aarch64 for cross compiling, and most are from Linux Kernel. Signed-off-by: Peng Fan Link: https://lore.kernel.org/r/20201209084205.24062-4-peng.fan@oss.nxp.com Signed-off-by: Michael S. Tsirkin --- tools/virtio/asm/barrier.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h index 04d563fc9b95..468435ed64e6 100644 --- a/tools/virtio/asm/barrier.h +++ b/tools/virtio/asm/barrier.h @@ -16,6 +16,16 @@ # define mb() abort() # define dma_rmb() abort() # define dma_wmb() abort() +#elif defined(__aarch64__) +#define dmb(opt) asm volatile("dmb " #opt : : : "memory") +#define virt_mb() __sync_synchronize() +#define virt_rmb() dmb(ishld) +#define virt_wmb() dmb(ishst) +#define virt_store_mb(var, value) do { WRITE_ONCE(var, value); dmb(ish); } while (0) +/* Weak barriers should be used. If not - it's a bug */ +# define mb() abort() +# define dma_rmb() abort() +# define dma_wmb() abort() #else #error Please fill in barrier macros #endif -- cgit v1.2.3 From ae93d8ea0fa701e84ab9df0db9fb60ec6c80d7b8 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:00 +0300 Subject: virtio_ring: Cut and paste bugs in vring_create_virtqueue_packed() There is a copy and paste bug in the error handling of this code and it uses "ring_dma_addr" three times instead of "device_event_dma_addr" and "driver_event_dma_addr". Fixes: 1ce9e6055fa0 (" virtio_ring: introduce packed ring support") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGRJlEzyn+04u2@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index becc77697960..924b6b85376b 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1676,9 +1676,9 @@ err_desc_extra: err_desc_state: kfree(vq); err_vq: - vring_free_queue(vdev, event_size_in_bytes, device, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, device, device_event_dma_addr); err_device: - vring_free_queue(vdev, event_size_in_bytes, driver, ring_dma_addr); + vring_free_queue(vdev, event_size_in_bytes, driver, driver_event_dma_addr); err_driver: vring_free_queue(vdev, ring_size_in_bytes, ring, ring_dma_addr); err_ring: -- cgit v1.2.3 From 411ea23a76526e6efed0b601abb603d3c981b333 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:16 +0300 Subject: virtio_net: Fix error code in probe() Set a negative error code intead of returning success if the MTU has been changed to something invalid. Fixes: fe36cbe0671e ("virtio_net: clear MTU when out of range") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGVJSeeCdII1Ys@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/net/virtio_net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 21b71148c532..34bb95dd9239 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -3072,6 +3072,7 @@ static int virtnet_probe(struct virtio_device *vdev) dev_err(&vdev->dev, "device MTU appears to have changed it is now %d < %d", mtu, dev->min_mtu); + err = -EINVAL; goto free; } -- cgit v1.2.3 From e152d8af4220a05c9797591609151d404866beaa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 4 Dec 2020 17:23:36 +0300 Subject: virtio_ring: Fix two use after free bugs The "vq" struct is added to the "vdev->vqs" list prematurely. If we encounter an error later in the function then the "vq" is freed, but since it is still on the list that could lead to a use after free bug. Fixes: cbeedb72b97a ("virtio_ring: allocate desc state for split ring separately") Reported-by: Robert Buhren Reported-by: Felicitas Hetzelt Signed-off-by: Dan Carpenter Link: https://lore.kernel.org/r/X8pGaG/zkI3jk8mk@mwanda Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/virtio/virtio_ring.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index 924b6b85376b..71e16b53e9c1 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -1608,7 +1608,6 @@ static struct virtqueue *vring_create_virtqueue_packed( vq->num_added = 0; vq->packed_ring = true; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -1669,6 +1668,7 @@ static struct virtqueue *vring_create_virtqueue_packed( cpu_to_le16(vq->packed.event_flags_shadow); } + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; err_desc_extra: @@ -2085,7 +2085,6 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, vq->last_used_idx = 0; vq->num_added = 0; vq->use_dma_api = vring_use_dma_api(vdev); - list_add_tail(&vq->vq.list, &vdev->vqs); #ifdef DEBUG vq->in_use = false; vq->last_add_time_valid = false; @@ -2127,6 +2126,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, memset(vq->split.desc_state, 0, vring.num * sizeof(struct vring_desc_state_split)); + list_add_tail(&vq->vq.list, &vdev->vqs); return &vq->vq; } EXPORT_SYMBOL_GPL(__vring_new_virtqueue); -- cgit v1.2.3 From 2e1139d613c7fb0956e82f72a8281c0a475ad4f8 Mon Sep 17 00:00:00 2001 From: Zhang Changzhong Date: Fri, 4 Dec 2020 16:43:30 +0800 Subject: vhost scsi: fix error return code in vhost_scsi_set_endpoint() Fix to return a negative error code from the error handling case instead of 0, as done elsewhere in this function. Fixes: 25b98b64e284 ("vhost scsi: alloc cmds per vq instead of session") Reported-by: Hulk Robot Signed-off-by: Zhang Changzhong Link: https://lore.kernel.org/r/1607071411-33484-1-git-send-email-zhangchangzhong@huawei.com Signed-off-by: Michael S. Tsirkin --- drivers/vhost/scsi.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index 6ff8a5096691..4ce9f00ae10e 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -1643,7 +1643,8 @@ vhost_scsi_set_endpoint(struct vhost_scsi *vs, if (!vhost_vq_is_setup(vq)) continue; - if (vhost_scsi_setup_vq_cmds(vq, vq->num)) + ret = vhost_scsi_setup_vq_cmds(vq, vq->num); + if (ret) goto destroy_vq_cmds; } -- cgit v1.2.3 From 1e38f0031c3055c9c7e5ffcb3bb09c95f69614ee Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 2 Dec 2020 12:19:30 +0100 Subject: uapi: virtio_ids.h: consistent indentions Fixing the differing indentions to be consistent and properly aligned. Signed-off-by: Enrico Weigelt, metux IT consult Link: https://lore.kernel.org/r/20201202111931.31953-1-info@metux.net Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index b052355ac7a3..3cb55e5277a1 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -29,24 +29,24 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ -#define VIRTIO_ID_NET 1 /* virtio net */ -#define VIRTIO_ID_BLOCK 2 /* virtio block */ -#define VIRTIO_ID_CONSOLE 3 /* virtio console */ -#define VIRTIO_ID_RNG 4 /* virtio rng */ -#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ -#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ -#define VIRTIO_ID_SCSI 8 /* virtio scsi */ -#define VIRTIO_ID_9P 9 /* 9p virtio console */ -#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ -#define VIRTIO_ID_CAIF 12 /* Virtio caif */ -#define VIRTIO_ID_GPU 16 /* virtio GPU */ -#define VIRTIO_ID_INPUT 18 /* virtio input */ -#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ -#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ -#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ -#define VIRTIO_ID_MEM 24 /* virtio mem */ -#define VIRTIO_ID_FS 26 /* virtio filesystem */ -#define VIRTIO_ID_PMEM 27 /* virtio pmem */ -#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ +#define VIRTIO_ID_NET 1 /* virtio net */ +#define VIRTIO_ID_BLOCK 2 /* virtio block */ +#define VIRTIO_ID_CONSOLE 3 /* virtio console */ +#define VIRTIO_ID_RNG 4 /* virtio rng */ +#define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ +#define VIRTIO_ID_SCSI 8 /* virtio scsi */ +#define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ +#define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_INPUT 18 /* virtio input */ +#define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ +#define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ +#define VIRTIO_ID_MEM 24 /* virtio mem */ +#define VIRTIO_ID_FS 26 /* virtio filesystem */ +#define VIRTIO_ID_PMEM 27 /* virtio pmem */ +#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From be618636de4186521ffba2cbe5105e9c3481b9cb Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 2 Dec 2020 12:19:31 +0100 Subject: uapi: virtio_ids: add missing device type IDs from OASIS spec The OASIS virtio spec (1.1) defines several IDs that aren't reflected in the header yet. Fixing this by adding the missing IDs, even though they're not yet used by the kernel yet. Signed-off-by: Enrico Weigelt, metux IT consult Link: https://lore.kernel.org/r/20201202111931.31953-2-info@metux.net Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_ids.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 3cb55e5277a1..bc1c0621f5ed 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -34,15 +34,21 @@ #define VIRTIO_ID_CONSOLE 3 /* virtio console */ #define VIRTIO_ID_RNG 4 /* virtio rng */ #define VIRTIO_ID_BALLOON 5 /* virtio balloon */ +#define VIRTIO_ID_IOMEM 6 /* virtio ioMemory */ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_MAC80211_WLAN 10 /* virtio WLAN MAC */ #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #define VIRTIO_ID_CAIF 12 /* Virtio caif */ +#define VIRTIO_ID_MEMORY_BALLOON 13 /* virtio memory balloon */ #define VIRTIO_ID_GPU 16 /* virtio GPU */ +#define VIRTIO_ID_CLOCK 17 /* virtio clock/timer */ #define VIRTIO_ID_INPUT 18 /* virtio input */ #define VIRTIO_ID_VSOCK 19 /* virtio vsock transport */ #define VIRTIO_ID_CRYPTO 20 /* virtio crypto */ +#define VIRTIO_ID_SIGNAL_DIST 21 /* virtio signal distribution device */ +#define VIRTIO_ID_PSTORE 22 /* virtio pstore device */ #define VIRTIO_ID_IOMMU 23 /* virtio IOMMU */ #define VIRTIO_ID_MEM 24 /* virtio mem */ #define VIRTIO_ID_FS 26 /* virtio filesystem */ -- cgit v1.2.3 From 476c135e321716ad7a8a5d4a19a636e2dcc50526 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:39:59 +0200 Subject: vdpa: Add missing comment for virtqueue count Add missing comment for number of virtqueue. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-2-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 30bc7a7223bb..0fefeb976877 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -42,6 +42,7 @@ struct vdpa_vq_state { * @config: the configuration ops for this device. * @index: device index * @features_valid: were features initialized? for legacy guests + * @nvqs: maximum number of supported virtqueues */ struct vdpa_device { struct device dev; -- cgit v1.2.3 From 418eddef050d5f6393c303a94e3173847ab85466 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 12 Nov 2020 08:40:00 +0200 Subject: vdpa: Use simpler version of ida allocation vdpa doesn't have any specific need to define start and end range of the device index. Hence use the simper version of the ida allocator. Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Acked-by: Jason Wang Link: https://lore.kernel.org/r/20201112064005.349268-3-parav@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/vdpa/vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index a69ffc991e13..c0825650c055 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -89,7 +89,7 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent, if (!vdev) goto err; - err = ida_simple_get(&vdpa_index_ida, 0, 0, GFP_KERNEL); + err = ida_alloc(&vdpa_index_ida, GFP_KERNEL); if (err < 0) goto err_ida; -- cgit v1.2.3