From 94300fcf4cef8e56f9e25fc75692e7db6d00e232 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 8 Nov 2021 18:31:51 -0800 Subject: virtio-mem: factor out hotplug specifics from virtio_mem_init() into virtio_mem_init_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-7-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 81 ++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index bef8ad6bf466..2ba7e8d6ba8d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2392,41 +2392,10 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) return 0; } -static int virtio_mem_init(struct virtio_mem *vm) +static int virtio_mem_init_hotplug(struct virtio_mem *vm) { const struct range pluggable_range = mhp_get_pluggable_range(true); uint64_t sb_size, addr; - uint16_t node_id; - - if (!vm->vdev->config->get) { - dev_err(&vm->vdev->dev, "config access disabled\n"); - return -EINVAL; - } - - /* - * We don't want to (un)plug or reuse any memory when in kdump. The - * memory is still accessible (but not mapped). - */ - if (is_kdump_kernel()) { - dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); - return -EBUSY; - } - - /* Fetch all properties that can't change. */ - virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, - &vm->plugged_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, - &vm->device_block_size); - virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, - &node_id); - vm->nid = virtio_mem_translate_node_id(vm, node_id); - virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); - virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, - &vm->region_size); - - /* Determine the nid for the device based on the lowest address. */ - if (vm->nid == NUMA_NO_NODE) - vm->nid = memory_add_physaddr_to_nid(vm->addr); /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -2496,10 +2465,6 @@ static int virtio_mem_init(struct virtio_mem *vm) vm->offline_threshold); } - dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); - dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); - dev_info(&vm->vdev->dev, "device block size: 0x%llx", - (unsigned long long)vm->device_block_size); dev_info(&vm->vdev->dev, "memory block size: 0x%lx", memory_block_size_bytes()); if (vm->in_sbm) @@ -2508,10 +2473,52 @@ static int virtio_mem_init(struct virtio_mem *vm) else dev_info(&vm->vdev->dev, "big block size: 0x%llx", (unsigned long long)vm->bbm.bb_size); + + return 0; +} + +static int virtio_mem_init(struct virtio_mem *vm) +{ + uint16_t node_id; + + if (!vm->vdev->config->get) { + dev_err(&vm->vdev->dev, "config access disabled\n"); + return -EINVAL; + } + + /* + * We don't want to (un)plug or reuse any memory when in kdump. The + * memory is still accessible (but not mapped). + */ + if (is_kdump_kernel()) { + dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); + return -EBUSY; + } + + /* Fetch all properties that can't change. */ + virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, + &vm->plugged_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size, + &vm->device_block_size); + virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id, + &node_id); + vm->nid = virtio_mem_translate_node_id(vm, node_id); + virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr); + virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size, + &vm->region_size); + + /* Determine the nid for the device based on the lowest address. */ + if (vm->nid == NUMA_NO_NODE) + vm->nid = memory_add_physaddr_to_nid(vm->addr); + + dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr); + dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size); + dev_info(&vm->vdev->dev, "device block size: 0x%llx", + (unsigned long long)vm->device_block_size); if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); - return 0; + return virtio_mem_init_hotplug(vm); } static int virtio_mem_create_resource(struct virtio_mem *vm) -- cgit v1.2.3 From 84e17e684eefa9d84d9d46e5436d46de129cd6da Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 8 Nov 2021 18:31:55 -0800 Subject: virtio-mem: factor out hotplug specifics from virtio_mem_probe() into virtio_mem_init_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-8-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 87 +++++++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 42 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 2ba7e8d6ba8d..1be3ee7f684d 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -260,6 +260,8 @@ static void virtio_mem_fake_offline_going_offline(unsigned long pfn, static void virtio_mem_fake_offline_cancel_offline(unsigned long pfn, unsigned long nr_pages); static void virtio_mem_retry(struct virtio_mem *vm); +static int virtio_mem_create_resource(struct virtio_mem *vm); +static void virtio_mem_delete_resource(struct virtio_mem *vm); /* * Register a virtio-mem device so it will be considered for the online_page @@ -2395,7 +2397,8 @@ static int virtio_mem_init_vq(struct virtio_mem *vm) static int virtio_mem_init_hotplug(struct virtio_mem *vm) { const struct range pluggable_range = mhp_get_pluggable_range(true); - uint64_t sb_size, addr; + uint64_t unit_pages, sb_size, addr; + int rc; /* bad device setup - warn only */ if (!IS_ALIGNED(vm->addr, memory_block_size_bytes())) @@ -2474,7 +2477,48 @@ static int virtio_mem_init_hotplug(struct virtio_mem *vm) dev_info(&vm->vdev->dev, "big block size: 0x%llx", (unsigned long long)vm->bbm.bb_size); + /* create the parent resource for all memory */ + rc = virtio_mem_create_resource(vm); + if (rc) + return rc; + + /* use a single dynamic memory group to cover the whole memory device */ + if (vm->in_sbm) + unit_pages = PHYS_PFN(memory_block_size_bytes()); + else + unit_pages = PHYS_PFN(vm->bbm.bb_size); + rc = memory_group_register_dynamic(vm->nid, unit_pages); + if (rc < 0) + goto out_del_resource; + vm->mgid = rc; + + /* + * If we still have memory plugged, we have to unplug all memory first. + * Registering our parent resource makes sure that this memory isn't + * actually in use (e.g., trying to reload the driver). + */ + if (vm->plugged_size) { + vm->unplug_all_required = true; + dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); + } + + /* register callbacks */ + vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; + rc = register_memory_notifier(&vm->memory_notifier); + if (rc) + goto out_unreg_group; + rc = register_virtio_mem_device(vm); + if (rc) + goto out_unreg_mem; + return 0; +out_unreg_mem: + unregister_memory_notifier(&vm->memory_notifier); +out_unreg_group: + memory_group_unregister(vm->mgid); +out_del_resource: + virtio_mem_delete_resource(vm); + return rc; } static int virtio_mem_init(struct virtio_mem *vm) @@ -2578,7 +2622,6 @@ static bool virtio_mem_has_memory_added(struct virtio_mem *vm) static int virtio_mem_probe(struct virtio_device *vdev) { struct virtio_mem *vm; - uint64_t unit_pages; int rc; BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24); @@ -2608,40 +2651,6 @@ static int virtio_mem_probe(struct virtio_device *vdev) if (rc) goto out_del_vq; - /* create the parent resource for all memory */ - rc = virtio_mem_create_resource(vm); - if (rc) - goto out_del_vq; - - /* use a single dynamic memory group to cover the whole memory device */ - if (vm->in_sbm) - unit_pages = PHYS_PFN(memory_block_size_bytes()); - else - unit_pages = PHYS_PFN(vm->bbm.bb_size); - rc = memory_group_register_dynamic(vm->nid, unit_pages); - if (rc < 0) - goto out_del_resource; - vm->mgid = rc; - - /* - * If we still have memory plugged, we have to unplug all memory first. - * Registering our parent resource makes sure that this memory isn't - * actually in use (e.g., trying to reload the driver). - */ - if (vm->plugged_size) { - vm->unplug_all_required = true; - dev_info(&vm->vdev->dev, "unplugging all memory is required\n"); - } - - /* register callbacks */ - vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb; - rc = register_memory_notifier(&vm->memory_notifier); - if (rc) - goto out_unreg_group; - rc = register_virtio_mem_device(vm); - if (rc) - goto out_unreg_mem; - virtio_device_ready(vdev); /* trigger a config update to start processing the requested_size */ @@ -2649,12 +2658,6 @@ static int virtio_mem_probe(struct virtio_device *vdev) queue_work(system_freezable_wq, &vm->wq); return 0; -out_unreg_mem: - unregister_memory_notifier(&vm->memory_notifier); -out_unreg_group: - memory_group_unregister(vm->mgid); -out_del_resource: - virtio_mem_delete_resource(vm); out_del_vq: vdev->config->del_vqs(vdev); out_free_vm: -- cgit v1.2.3 From ffc763d0c334c397ef04c02f6bd512ae23bfa1aa Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 8 Nov 2021 18:31:58 -0800 Subject: virtio-mem: factor out hotplug specifics from virtio_mem_remove() into virtio_mem_deinit_hotplug() Let's prepare for a new virtio-mem kdump mode in which we don't actually hot(un)plug any memory but only observe the state of device blocks. Link: https://lkml.kernel.org/r/20211005121430.30136-9-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 1be3ee7f684d..76d8aef3cfd2 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2667,9 +2667,8 @@ out_free_vm: return rc; } -static void virtio_mem_remove(struct virtio_device *vdev) +static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) { - struct virtio_mem *vm = vdev->priv; unsigned long mb_id; int rc; @@ -2716,7 +2715,8 @@ static void virtio_mem_remove(struct virtio_device *vdev) * away. Warn at least. */ if (virtio_mem_has_memory_added(vm)) { - dev_warn(&vdev->dev, "device still has system memory added\n"); + dev_warn(&vm->vdev->dev, + "device still has system memory added\n"); } else { virtio_mem_delete_resource(vm); kfree_const(vm->resource_name); @@ -2730,6 +2730,13 @@ static void virtio_mem_remove(struct virtio_device *vdev) } else { vfree(vm->bbm.bb_states); } +} + +static void virtio_mem_remove(struct virtio_device *vdev) +{ + struct virtio_mem *vm = vdev->priv; + + virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); -- cgit v1.2.3 From ce2814622e844fd8c71d38c458e04d765dadfd04 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 8 Nov 2021 18:32:02 -0800 Subject: virtio-mem: kdump mode to sanitize /proc/vmcore access Although virtio-mem currently supports reading unplugged memory in the hypervisor, this will change in the future, indicated to the device via a new feature flag. We similarly sanitized /proc/kcore access recently. [1] Let's register a vmcore callback, to allow vmcore code to check if a PFN belonging to a virtio-mem device is either currently plugged and should be dumped or is currently unplugged and should not be accessed, instead mapping the shared zeropage or returning zeroes when reading. This is important when not capturing /proc/vmcore via tools like "makedumpfile" that can identify logically unplugged virtio-mem memory via PG_offline in the memmap, but simply by e.g., copying the file. Distributions that support virtio-mem+kdump have to make sure that the virtio_mem module will be part of the kdump kernel or the kdump initrd; dracut was recently [2] extended to include virtio-mem in the generated initrd. As long as no special kdump kernels are used, this will automatically make sure that virtio-mem will be around in the kdump initrd and sanitize /proc/vmcore access -- with dracut. With this series, we'll send one virtio-mem state request for every ~2 MiB chunk of virtio-mem memory indicated in the vmcore that we intend to read/map. In the future, we might want to allow building virtio-mem for kdump mode only, even without CONFIG_MEMORY_HOTPLUG and friends: this way, we could support special stripped-down kdump kernels that have many other config options disabled; we'll tackle that once required. Further, we might want to try sensing bigger blocks (e.g., memory sections) first before falling back to device blocks on demand. Tested with Fedora rawhide, which contains a recent kexec-tools version (considering "System RAM (virtio_mem)" when creating the vmcore header) and a recent dracut version (including the virtio_mem module in the kdump initrd). Link: https://lkml.kernel.org/r/20210526093041.8800-1-david@redhat.com [1] Link: https://github.com/dracutdevs/dracut/pull/1157 [2] Link: https://lkml.kernel.org/r/20211005121430.30136-10-david@redhat.com Signed-off-by: David Hildenbrand Cc: Baoquan He Cc: Borislav Petkov Cc: Boris Ostrovsky Cc: Dave Young Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jason Wang Cc: Juergen Gross Cc: "Michael S. Tsirkin" Cc: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: "Rafael J. Wysocki" Cc: Stefano Stabellini Cc: Thomas Gleixner Cc: Vivek Goyal Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/virtio_mem.c | 136 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 124 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 76d8aef3cfd2..3573b78f6b05 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -223,6 +223,9 @@ struct virtio_mem { * When this lock is held the pointers can't change, ONLINE and * OFFLINE blocks can't change the state and no subblocks will get * plugged/unplugged. + * + * In kdump mode, used to serialize requests, last_block_addr and + * last_block_plugged. */ struct mutex hotplug_mutex; bool hotplug_active; @@ -230,6 +233,9 @@ struct virtio_mem { /* An error occurred we cannot handle - stop processing requests. */ bool broken; + /* Cached valued of is_kdump_kernel() when the device was probed. */ + bool in_kdump; + /* The driver is being removed. */ spinlock_t removal_lock; bool removing; @@ -243,6 +249,13 @@ struct virtio_mem { /* Memory notifier (online/offline events). */ struct notifier_block memory_notifier; +#ifdef CONFIG_PROC_VMCORE + /* vmcore callback for /proc/vmcore handling in kdump mode */ + struct vmcore_cb vmcore_cb; + uint64_t last_block_addr; + bool last_block_plugged; +#endif /* CONFIG_PROC_VMCORE */ + /* Next device in the list of virtio-mem devices. */ struct list_head next; }; @@ -2293,6 +2306,12 @@ static void virtio_mem_run_wq(struct work_struct *work) uint64_t diff; int rc; + if (unlikely(vm->in_kdump)) { + dev_warn_once(&vm->vdev->dev, + "unexpected workqueue run in kdump kernel\n"); + return; + } + hrtimer_cancel(&vm->retry_timer); if (vm->broken) @@ -2521,6 +2540,86 @@ out_del_resource: return rc; } +#ifdef CONFIG_PROC_VMCORE +static int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr, + uint64_t size) +{ + const uint64_t nb_vm_blocks = size / vm->device_block_size; + const struct virtio_mem_req req = { + .type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE), + .u.state.addr = cpu_to_virtio64(vm->vdev, addr), + .u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks), + }; + int rc = -ENOMEM; + + dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr, + addr + size - 1); + + switch (virtio_mem_send_request(vm, &req)) { + case VIRTIO_MEM_RESP_ACK: + return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state); + case VIRTIO_MEM_RESP_ERROR: + rc = -EINVAL; + break; + default: + break; + } + + dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc); + return rc; +} + +static bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb, + unsigned long pfn) +{ + struct virtio_mem *vm = container_of(cb, struct virtio_mem, + vmcore_cb); + uint64_t addr = PFN_PHYS(pfn); + bool is_ram; + int rc; + + if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE)) + return true; + if (!vm->plugged_size) + return false; + + /* + * We have to serialize device requests and access to the information + * about the block queried last. + */ + mutex_lock(&vm->hotplug_mutex); + + addr = ALIGN_DOWN(addr, vm->device_block_size); + if (addr != vm->last_block_addr) { + rc = virtio_mem_send_state_request(vm, addr, + vm->device_block_size); + /* On any kind of error, we're going to signal !ram. */ + if (rc == VIRTIO_MEM_STATE_PLUGGED) + vm->last_block_plugged = true; + else + vm->last_block_plugged = false; + vm->last_block_addr = addr; + } + + is_ram = vm->last_block_plugged; + mutex_unlock(&vm->hotplug_mutex); + return is_ram; +} +#endif /* CONFIG_PROC_VMCORE */ + +static int virtio_mem_init_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n"); + vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram; + register_vmcore_cb(&vm->vmcore_cb); + return 0; +#else /* CONFIG_PROC_VMCORE */ + dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n"); + return -EBUSY; +#endif /* CONFIG_PROC_VMCORE */ +} + static int virtio_mem_init(struct virtio_mem *vm) { uint16_t node_id; @@ -2530,15 +2629,6 @@ static int virtio_mem_init(struct virtio_mem *vm) return -EINVAL; } - /* - * We don't want to (un)plug or reuse any memory when in kdump. The - * memory is still accessible (but not mapped). - */ - if (is_kdump_kernel()) { - dev_warn(&vm->vdev->dev, "disabled in kdump kernel\n"); - return -EBUSY; - } - /* Fetch all properties that can't change. */ virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size, &vm->plugged_size); @@ -2562,6 +2652,12 @@ static int virtio_mem_init(struct virtio_mem *vm) if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA)) dev_info(&vm->vdev->dev, "nid: %d", vm->nid); + /* + * We don't want to (un)plug or reuse any memory when in kdump. The + * memory is still accessible (but not exposed to Linux). + */ + if (vm->in_kdump) + return virtio_mem_init_kdump(vm); return virtio_mem_init_hotplug(vm); } @@ -2640,6 +2736,7 @@ static int virtio_mem_probe(struct virtio_device *vdev) hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vm->retry_timer.function = virtio_mem_timer_expired; vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS; + vm->in_kdump = is_kdump_kernel(); /* register the virtqueue */ rc = virtio_mem_init_vq(vm); @@ -2654,8 +2751,10 @@ static int virtio_mem_probe(struct virtio_device *vdev) virtio_device_ready(vdev); /* trigger a config update to start processing the requested_size */ - atomic_set(&vm->config_changed, 1); - queue_work(system_freezable_wq, &vm->wq); + if (!vm->in_kdump) { + atomic_set(&vm->config_changed, 1); + queue_work(system_freezable_wq, &vm->wq); + } return 0; out_del_vq: @@ -2732,11 +2831,21 @@ static void virtio_mem_deinit_hotplug(struct virtio_mem *vm) } } +static void virtio_mem_deinit_kdump(struct virtio_mem *vm) +{ +#ifdef CONFIG_PROC_VMCORE + unregister_vmcore_cb(&vm->vmcore_cb); +#endif /* CONFIG_PROC_VMCORE */ +} + static void virtio_mem_remove(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; - virtio_mem_deinit_hotplug(vm); + if (vm->in_kdump) + virtio_mem_deinit_kdump(vm); + else + virtio_mem_deinit_hotplug(vm); /* reset the device and cleanup the queues */ vdev->config->reset(vdev); @@ -2750,6 +2859,9 @@ static void virtio_mem_config_changed(struct virtio_device *vdev) { struct virtio_mem *vm = vdev->priv; + if (unlikely(vm->in_kdump)) + return; + atomic_set(&vm->config_changed, 1); virtio_mem_retry(vm); } -- cgit v1.2.3 From 5f6286a608107a68646241b57d2bd2a4fc139ef9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 8 Nov 2021 18:32:32 -0800 Subject: include/linux/delay.h: replace kernel.h with the necessary inclusions When kernel.h is used in the headers it adds a lot into dependency hell, especially when there are circular dependencies are involved. Replace kernel.h inclusion with the list of what is really being used. [akpm@linux-foundation.org: cxd2880_common.h needs bits.h for GENMASK()] [andriy.shevchenko@linux.intel.com: delay.h: fix for removed kernel.h] Link: https://lkml.kernel.org/r/20211028170143.56523-1-andriy.shevchenko@linux.intel.com [akpm@linux-foundation.org: include/linux/fwnode.h needs bits.h for BIT()] Link: https://lkml.kernel.org/r/20211027150324.79827-1-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/riscv/lib/delay.c | 4 ++++ arch/s390/include/asm/facility.h | 4 ++++ drivers/media/dvb-frontends/cxd2880/cxd2880_common.h | 1 + include/linux/delay.h | 2 +- include/linux/fwnode.h | 1 + 5 files changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/arch/riscv/lib/delay.c b/arch/riscv/lib/delay.c index f51c9a03bca1..49d510ba75fd 100644 --- a/arch/riscv/lib/delay.c +++ b/arch/riscv/lib/delay.c @@ -4,10 +4,14 @@ */ #include +#include #include #include +#include #include +#include + /* * This is copies from arch/arm/include/asm/delay.h * diff --git a/arch/s390/include/asm/facility.h b/arch/s390/include/asm/facility.h index e3aa354ab9f4..94b6919026df 100644 --- a/arch/s390/include/asm/facility.h +++ b/arch/s390/include/asm/facility.h @@ -9,8 +9,12 @@ #define __ASM_FACILITY_H #include + +#include #include +#include #include + #include #define MAX_FACILITY_BIT (sizeof(stfle_fac_list) * 8) diff --git a/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h b/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h index b05bce71ab35..9dc15a5a9683 100644 --- a/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h +++ b/drivers/media/dvb-frontends/cxd2880/cxd2880_common.h @@ -12,6 +12,7 @@ #include #include #include +#include #include int cxd2880_convert2s_complement(u32 value, u32 bitlen); diff --git a/include/linux/delay.h b/include/linux/delay.h index 1d0e2ce6b6d9..8eacf67eb212 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -19,7 +19,7 @@ * https://lists.openwall.net/linux-kernel/2011/01/09/56 */ -#include +#include extern unsigned long loops_per_jiffy; diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 9f4ad719bfe3..3a532ba66f6c 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -11,6 +11,7 @@ #include #include +#include #include struct fwnode_operations; -- cgit v1.2.3 From 0f68d45ef41abb618a9ca33996348ae73800a106 Mon Sep 17 00:00:00 2001 From: Imran Khan Date: Mon, 8 Nov 2021 18:33:16 -0800 Subject: lib, stackdepot: add helper to print stack entries into buffer To print stack entries into a buffer, users of stackdepot, first get a list of stack entries using stack_depot_fetch and then print this list into a buffer using stack_trace_snprint. Provide a helper in stackdepot for this purpose. Also change above mentioned users to use this helper. [imran.f.khan@oracle.com: fix build error] Link: https://lkml.kernel.org/r/20210915175321.3472770-4-imran.f.khan@oracle.com [imran.f.khan@oracle.com: export stack_depot_snprint() to modules] Link: https://lkml.kernel.org/r/20210916133535.3592491-4-imran.f.khan@oracle.com Link: https://lkml.kernel.org/r/20210915014806.3206938-4-imran.f.khan@oracle.com Signed-off-by: Imran Khan Suggested-by: Vlastimil Babka Acked-by: Vlastimil Babka Acked-by: Jani Nikula [i915] Cc: Alexander Potapenko Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Daniel Vetter Cc: David Airlie Cc: Dmitry Vyukov Cc: Geert Uytterhoeven Cc: Maarten Lankhorst Cc: Maxime Ripard Cc: Thomas Zimmermann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpu/drm/drm_dp_mst_topology.c | 5 +---- drivers/gpu/drm/drm_mm.c | 5 +---- drivers/gpu/drm/i915/i915_vma.c | 5 +---- drivers/gpu/drm/i915/intel_runtime_pm.c | 20 +++++--------------- include/linux/stackdepot.h | 3 +++ lib/stackdepot.c | 25 +++++++++++++++++++++++++ mm/page_owner.c | 5 +---- 7 files changed, 37 insertions(+), 31 deletions(-) (limited to 'drivers') diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c index 86d13d6bc463..2d1adab9e360 100644 --- a/drivers/gpu/drm/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/drm_dp_mst_topology.c @@ -1668,13 +1668,10 @@ __dump_topology_ref_history(struct drm_dp_mst_topology_ref_history *history, for (i = 0; i < history->len; i++) { const struct drm_dp_mst_topology_ref_entry *entry = &history->entries[i]; - ulong *entries; - uint nr_entries; u64 ts_nsec = entry->ts_nsec; u32 rem_nsec = do_div(ts_nsec, 1000000000); - nr_entries = stack_depot_fetch(entry->backtrace, &entries); - stack_trace_snprint(buf, PAGE_SIZE, entries, nr_entries, 4); + stack_depot_snprint(entry->backtrace, buf, PAGE_SIZE, 4); drm_printf(&p, " %d %ss (last at %5llu.%06u):\n%s", entry->count, diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c index 93d48a6f04ab..7d1c578388d3 100644 --- a/drivers/gpu/drm/drm_mm.c +++ b/drivers/gpu/drm/drm_mm.c @@ -118,8 +118,6 @@ static noinline void save_stack(struct drm_mm_node *node) static void show_leaks(struct drm_mm *mm) { struct drm_mm_node *node; - unsigned long *entries; - unsigned int nr_entries; char *buf; buf = kmalloc(BUFSZ, GFP_KERNEL); @@ -133,8 +131,7 @@ static void show_leaks(struct drm_mm *mm) continue; } - nr_entries = stack_depot_fetch(node->stack, &entries); - stack_trace_snprint(buf, BUFSZ, entries, nr_entries, 0); + stack_depot_snprint(node->stack, buf, BUFSZ, 0); DRM_ERROR("node [%08llx + %08llx]: inserted at\n%s", node->start, node->size, buf); } diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 4b7fc4647e46..f2d9ed375109 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -56,8 +56,6 @@ void i915_vma_free(struct i915_vma *vma) static void vma_print_allocator(struct i915_vma *vma, const char *reason) { - unsigned long *entries; - unsigned int nr_entries; char buf[512]; if (!vma->node.stack) { @@ -66,8 +64,7 @@ static void vma_print_allocator(struct i915_vma *vma, const char *reason) return; } - nr_entries = stack_depot_fetch(vma->node.stack, &entries); - stack_trace_snprint(buf, sizeof(buf), entries, nr_entries, 0); + stack_depot_snprint(vma->node.stack, buf, sizeof(buf), 0); DRM_DEBUG_DRIVER("vma.node [%08llx + %08llx] %s: inserted at %s\n", vma->node.start, vma->node.size, reason, buf); } diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c index eaf7688f517d..0d85f3c5c526 100644 --- a/drivers/gpu/drm/i915/intel_runtime_pm.c +++ b/drivers/gpu/drm/i915/intel_runtime_pm.c @@ -65,16 +65,6 @@ static noinline depot_stack_handle_t __save_depot_stack(void) return stack_depot_save(entries, n, GFP_NOWAIT | __GFP_NOWARN); } -static void __print_depot_stack(depot_stack_handle_t stack, - char *buf, int sz, int indent) -{ - unsigned long *entries; - unsigned int nr_entries; - - nr_entries = stack_depot_fetch(stack, &entries); - stack_trace_snprint(buf, sz, entries, nr_entries, indent); -} - static void init_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm) { spin_lock_init(&rpm->debug.lock); @@ -146,12 +136,12 @@ static void untrack_intel_runtime_pm_wakeref(struct intel_runtime_pm *rpm, if (!buf) return; - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); DRM_DEBUG_DRIVER("wakeref %x from\n%s", stack, buf); stack = READ_ONCE(rpm->debug.last_release); if (stack) { - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); DRM_DEBUG_DRIVER("wakeref last released at\n%s", buf); } @@ -183,12 +173,12 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p, return; if (dbg->last_acquire) { - __print_depot_stack(dbg->last_acquire, buf, PAGE_SIZE, 2); + stack_depot_snprint(dbg->last_acquire, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref last acquired:\n%s", buf); } if (dbg->last_release) { - __print_depot_stack(dbg->last_release, buf, PAGE_SIZE, 2); + stack_depot_snprint(dbg->last_release, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref last released:\n%s", buf); } @@ -203,7 +193,7 @@ __print_intel_runtime_pm_wakeref(struct drm_printer *p, rep = 1; while (i + 1 < dbg->count && dbg->owners[i + 1] == stack) rep++, i++; - __print_depot_stack(stack, buf, PAGE_SIZE, 2); + stack_depot_snprint(stack, buf, PAGE_SIZE, 2); drm_printf(p, "Wakeref x%lu taken at:\n%s", rep, buf); } diff --git a/include/linux/stackdepot.h b/include/linux/stackdepot.h index 8ab37500fcba..c34b55a6e554 100644 --- a/include/linux/stackdepot.h +++ b/include/linux/stackdepot.h @@ -25,6 +25,9 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries, unsigned int stack_depot_fetch(depot_stack_handle_t handle, unsigned long **entries); +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces); + void stack_depot_print(depot_stack_handle_t stack); #ifdef CONFIG_STACKDEPOT diff --git a/lib/stackdepot.c b/lib/stackdepot.c index 4e1f2982d0fa..b437ae79aca1 100644 --- a/lib/stackdepot.c +++ b/lib/stackdepot.c @@ -213,6 +213,31 @@ static inline struct stack_record *find_stack(struct stack_record *bucket, return NULL; } +/** + * stack_depot_snprint - print stack entries from a depot into a buffer + * + * @handle: Stack depot handle which was returned from + * stack_depot_save(). + * @buf: Pointer to the print buffer + * + * @size: Size of the print buffer + * + * @spaces: Number of leading spaces to print + * + * Return: Number of bytes printed. + */ +int stack_depot_snprint(depot_stack_handle_t handle, char *buf, size_t size, + int spaces) +{ + unsigned long *entries; + unsigned int nr_entries; + + nr_entries = stack_depot_fetch(handle, &entries); + return nr_entries ? stack_trace_snprint(buf, size, entries, nr_entries, + spaces) : 0; +} +EXPORT_SYMBOL_GPL(stack_depot_snprint); + /** * stack_depot_print - print stack entries from a depot * diff --git a/mm/page_owner.c b/mm/page_owner.c index eff29be1218b..1653040d1133 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -329,8 +329,6 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, depot_stack_handle_t handle) { int ret, pageblock_mt, page_mt; - unsigned long *entries; - unsigned int nr_entries; char *kbuf; count = min_t(size_t, count, PAGE_SIZE); @@ -361,8 +359,7 @@ print_page_owner(char __user *buf, size_t count, unsigned long pfn, if (ret >= count) goto err; - nr_entries = stack_depot_fetch(handle, &entries); - ret += stack_trace_snprint(kbuf + ret, count - ret, entries, nr_entries, 0); + ret += stack_depot_snprint(handle, kbuf + ret, count - ret, 0); if (ret >= count) goto err; -- cgit v1.2.3 From 2128f4e21aa283945e6f0fb183e70fdfdc0d66f0 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 8 Nov 2021 18:35:53 -0800 Subject: virtio-mem: disallow mapping virtio-mem memory via /dev/mem We don't want user space to be able to map virtio-mem device memory directly (e.g., via /dev/mem) in order to have guarantees that in a sane setup we'll never accidentially access unplugged memory within the device-managed region of a virtio-mem device, just as required by the virtio-spec. As soon as the virtio-mem driver is loaded, the device region is visible in /proc/iomem via the parent device region. From that point on user space is aware of the device region and we want to disallow mapping anything inside that region (where we will dynamically (un)plug memory) until the driver has been unloaded cleanly and e.g., another driver might take over. By creating our parent IORESOURCE_SYSTEM_RAM resource with IORESOURCE_EXCLUSIVE, we will disallow any /dev/mem access to our device region until the driver was unloaded cleanly and removed the parent region. This will work even though only some memory blocks are actually currently added to Linux and appear as busy in the resource tree. So access to the region from user space is only possible a) if we don't load the virtio-mem driver. b) after unloading the virtio-mem driver cleanly. Don't build virtio-mem if access to /dev/mem cannot be restricticted -- if we have CONFIG_DEVMEM=y but CONFIG_STRICT_DEVMEM is not set. Link: https://lkml.kernel.org/r/20210920142856.17758-4-david@redhat.com Signed-off-by: David Hildenbrand Reviewed-by: Dan Williams Acked-by: Michael S. Tsirkin Cc: Andy Shevchenko Cc: Arnd Bergmann Cc: Greg Kroah-Hartman Cc: Hanjun Guo Cc: Jason Wang Cc: "Rafael J. Wysocki" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/virtio/Kconfig | 1 + drivers/virtio/virtio_mem.c | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig index 3654def9915c..44a082639439 100644 --- a/drivers/virtio/Kconfig +++ b/drivers/virtio/Kconfig @@ -101,6 +101,7 @@ config VIRTIO_MEM depends on MEMORY_HOTPLUG depends on MEMORY_HOTREMOVE depends on CONTIG_ALLOC + depends on EXCLUSIVE_SYSTEM_RAM help This driver provides access to virtio-mem paravirtualized memory devices, allowing to hotplug and hotunplug memory. diff --git a/drivers/virtio/virtio_mem.c b/drivers/virtio/virtio_mem.c index 3573b78f6b05..0da0af251c73 100644 --- a/drivers/virtio/virtio_mem.c +++ b/drivers/virtio/virtio_mem.c @@ -2672,8 +2672,10 @@ static int virtio_mem_create_resource(struct virtio_mem *vm) if (!name) return -ENOMEM; + /* Disallow mapping device memory via /dev/mem completely. */ vm->parent_resource = __request_mem_region(vm->addr, vm->region_size, - name, IORESOURCE_SYSTEM_RAM); + name, IORESOURCE_SYSTEM_RAM | + IORESOURCE_EXCLUSIVE); if (!vm->parent_resource) { kfree(name); dev_warn(&vm->vdev->dev, "could not reserve device region\n"); -- cgit v1.2.3