diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 67 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 10 | ||||
-rw-r--r-- | drivers/hv/hv.c | 44 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 3 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 3 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 65 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 120 |
7 files changed, 251 insertions, 61 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index ba0a092ae085..741857d80da1 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -29,12 +29,26 @@ #include <linux/hyperv.h> #include <linux/uio.h> #include <linux/interrupt.h> +#include <asm/page.h> #include "hyperv_vmbus.h" #define NUM_PAGES_SPANNED(addr, len) \ ((PAGE_ALIGN(addr + len) >> PAGE_SHIFT) - (addr >> PAGE_SHIFT)) +static unsigned long virt_to_hvpfn(void *addr) +{ + unsigned long paddr; + + if (is_vmalloc_addr(addr)) + paddr = page_to_phys(vmalloc_to_page(addr)) + + offset_in_page(addr); + else + paddr = __pa(addr); + + return paddr >> PAGE_SHIFT; +} + /* * vmbus_setevent- Trigger an event notification on the specified * channel. @@ -298,8 +312,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pfncount; i++) - gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( - kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; + gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn( + kbuffer + PAGE_SIZE * i); *msginfo = msgheader; pfnsum = pfncount; @@ -350,9 +364,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, * so the hypervisor guarantees that this is ok. */ for (i = 0; i < pfncurr; i++) - gpadl_body->pfn[i] = slow_virt_to_phys( - kbuffer + PAGE_SIZE * (pfnsum + i)) >> - PAGE_SHIFT; + gpadl_body->pfn[i] = virt_to_hvpfn( + kbuffer + PAGE_SIZE * (pfnsum + i)); /* add to msg header */ list_add_tail(&msgbody->msglistentry, @@ -380,8 +393,8 @@ static int create_gpadl_header(void *kbuffer, u32 size, gpadl_header->range[0].byte_offset = 0; gpadl_header->range[0].byte_count = size; for (i = 0; i < pagecount; i++) - gpadl_header->range[0].pfn_array[i] = slow_virt_to_phys( - kbuffer + PAGE_SIZE * i) >> PAGE_SHIFT; + gpadl_header->range[0].pfn_array[i] = virt_to_hvpfn( + kbuffer + PAGE_SIZE * i); *msginfo = msgheader; } @@ -558,11 +571,8 @@ static void reset_channel_cb(void *arg) channel->onchannel_callback = NULL; } -static int vmbus_close_internal(struct vmbus_channel *channel) +void vmbus_reset_channel_cb(struct vmbus_channel *channel) { - struct vmbus_channel_close_channel *msg; - int ret; - /* * vmbus_on_event(), running in the per-channel tasklet, can race * with vmbus_close_internal() in the case of SMP guest, e.g., when @@ -572,6 +582,29 @@ static int vmbus_close_internal(struct vmbus_channel *channel) */ tasklet_disable(&channel->callback_event); + channel->sc_creation_callback = NULL; + + /* Stop the callback asap */ + if (channel->target_cpu != get_cpu()) { + put_cpu(); + smp_call_function_single(channel->target_cpu, reset_channel_cb, + channel, true); + } else { + reset_channel_cb(channel); + put_cpu(); + } + + /* Re-enable tasklet for use on re-open */ + tasklet_enable(&channel->callback_event); +} + +static int vmbus_close_internal(struct vmbus_channel *channel) +{ + struct vmbus_channel_close_channel *msg; + int ret; + + vmbus_reset_channel_cb(channel); + /* * In case a device driver's probe() fails (e.g., * util_probe() -> vmbus_open() returns -ENOMEM) and the device is @@ -585,16 +618,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) } channel->state = CHANNEL_OPEN_STATE; - channel->sc_creation_callback = NULL; - /* Stop callback and cancel the timer asap */ - if (channel->target_cpu != get_cpu()) { - put_cpu(); - smp_call_function_single(channel->target_cpu, reset_channel_cb, - channel, true); - } else { - reset_channel_cb(channel); - put_cpu(); - } /* Send a closing message */ @@ -639,8 +662,6 @@ static int vmbus_close_internal(struct vmbus_channel *channel) get_order(channel->ringbuffer_pagecount * PAGE_SIZE)); out: - /* re-enable tasklet for use on re-open */ - tasklet_enable(&channel->callback_event); return ret; } diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index ecc2bd275a73..0f0e091c117c 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -527,10 +527,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) struct hv_device *dev = newchannel->primary_channel->device_obj; - if (vmbus_add_channel_kobj(dev, newchannel)) { - atomic_dec(&vmbus_connection.offer_in_progress); + if (vmbus_add_channel_kobj(dev, newchannel)) goto err_free_chan; - } if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); @@ -895,6 +893,12 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) } /* + * Before setting channel->rescind in vmbus_rescind_cleanup(), we + * should make sure the channel callback is not running any more. + */ + vmbus_reset_channel_cb(channel); + + /* * Now wait for offer handling to complete. */ vmbus_rescind_cleanup(channel); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 658dc765753b..748a1c4172a6 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -64,7 +64,7 @@ int hv_init(void) return -ENOMEM; direct_mode_enabled = ms_hyperv.misc_features & - HV_X64_STIMER_DIRECT_MODE_AVAILABLE; + HV_STIMER_DIRECT_MODE_AVAILABLE; return 0; } @@ -127,14 +127,14 @@ static int hv_ce_set_next_event(unsigned long delta, current_tick = hyperv_cs->read(NULL); current_tick += delta; - hv_init_timer(HV_X64_MSR_STIMER0_COUNT, current_tick); + hv_init_timer(0, current_tick); return 0; } static int hv_ce_shutdown(struct clock_event_device *evt) { - hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0); - hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0); + hv_init_timer(0, 0); + hv_init_timer_config(0, 0); if (direct_mode_enabled) hv_disable_stimer0_percpu_irq(stimer0_irq); @@ -164,7 +164,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) timer_cfg.direct_mode = 0; timer_cfg.sintx = VMBUS_MESSAGE_SINT; } - hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + hv_init_timer_config(0, timer_cfg.as_uint64); return 0; } @@ -242,6 +242,10 @@ int hv_synic_alloc(void) return 0; err: + /* + * Any memory allocations that succeeded will be freed when + * the caller cleans up by calling hv_synic_free() + */ return -ENOMEM; } @@ -254,12 +258,10 @@ void hv_synic_free(void) struct hv_per_cpu_context *hv_cpu = per_cpu_ptr(hv_context.cpu_context, cpu); - if (hv_cpu->synic_event_page) - free_page((unsigned long)hv_cpu->synic_event_page); - if (hv_cpu->synic_message_page) - free_page((unsigned long)hv_cpu->synic_message_page); - if (hv_cpu->post_msg_page) - free_page((unsigned long)hv_cpu->post_msg_page); + kfree(hv_cpu->clk_evt); + free_page((unsigned long)hv_cpu->synic_event_page); + free_page((unsigned long)hv_cpu->synic_message_page); + free_page((unsigned long)hv_cpu->post_msg_page); } kfree(hv_context.hv_numa_map); @@ -298,18 +300,16 @@ int hv_synic_init(unsigned int cpu) hv_set_siefp(siefp.as_uint64); /* Setup the shared SINT. */ - hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; shared_sint.masked = false; - if (ms_hyperv.hints & HV_X64_DEPRECATING_AEOI_RECOMMENDED) + if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) shared_sint.auto_eoi = false; else shared_sint.auto_eoi = true; - hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); /* Enable the global synic bit */ hv_get_synic_state(sctrl.as_uint64); @@ -322,7 +322,7 @@ int hv_synic_init(unsigned int cpu) /* * Register the per-cpu clockevent source. */ - if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) clockevents_config_and_register(hv_cpu->clk_evt, HV_TIMER_FREQUENCY, HV_MIN_DELTA_TICKS, @@ -337,7 +337,7 @@ void hv_synic_clockevents_cleanup(void) { int cpu; - if (!(ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE)) + if (!(ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE)) return; if (direct_mode_enabled) @@ -396,7 +396,7 @@ int hv_synic_cleanup(unsigned int cpu) return -EBUSY; /* Turn off clockevent device */ - if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) { + if (ms_hyperv.features & HV_MSR_SYNTIMER_AVAILABLE) { struct hv_per_cpu_context *hv_cpu = this_cpu_ptr(hv_context.cpu_context); @@ -405,15 +405,13 @@ int hv_synic_cleanup(unsigned int cpu) put_cpu_ptr(hv_cpu); } - hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); shared_sint.masked = 1; /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ - hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, - shared_sint.as_uint64); + hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64); hv_get_simp(simp.as_uint64); simp.simp_enabled = 0; diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b3e9f13f8bc3..b1b788082793 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1765,6 +1765,9 @@ static struct hv_driver balloon_drv = { .id_table = id_table, .probe = balloon_probe, .remove = balloon_remove, + .driver = { + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int __init init_balloon_drv(void) diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 14dce25c104f..423205077bf6 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -487,6 +487,9 @@ static struct hv_driver util_drv = { .id_table = id_table, .probe = util_probe, .remove = util_remove, + .driver = { + .probe_type = PROBE_PREFER_ASYNCHRONOUS, + }, }; static int hv_ptp_enable(struct ptp_clock_info *info, diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index be3c8b10b84a..3e90eb91db45 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -431,7 +431,24 @@ static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi, } /* - * Update host ring buffer after iterating over packets. + * Update host ring buffer after iterating over packets. If the host has + * stopped queuing new entries because it found the ring buffer full, and + * sufficient space is being freed up, signal the host. But be careful to + * only signal the host when necessary, both for performance reasons and + * because Hyper-V protects itself by throttling guests that signal + * inappropriately. + * + * Determining when to signal is tricky. There are three key data inputs + * that must be handled in this order to avoid race conditions: + * + * 1. Update the read_index + * 2. Read the pending_send_sz + * 3. Read the current write_index + * + * The interrupt_mask is not used to determine when to signal. The + * interrupt_mask is used only on the guest->host ring buffer when + * sending requests to the host. The host does not use it on the host-> + * guest ring buffer to indicate whether it should be signaled. */ void hv_pkt_iter_close(struct vmbus_channel *channel) { @@ -447,22 +464,30 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) start_read_index = rbi->ring_buffer->read_index; rbi->ring_buffer->read_index = rbi->priv_read_index; + /* + * Older versions of Hyper-V (before WS2102 and Win8) do not + * implement pending_send_sz and simply poll if the host->guest + * ring buffer is full. No signaling is needed or expected. + */ if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz) return; /* * Issue a full memory barrier before making the signaling decision. - * Here is the reason for having this barrier: - * If the reading of the pend_sz (in this function) - * were to be reordered and read before we commit the new read - * index (in the calling function) we could - * have a problem. If the host were to set the pending_sz after we - * have sampled pending_sz and go to sleep before we commit the + * If reading pending_send_sz were to be reordered and happen + * before we commit the new read_index, a race could occur. If the + * host were to set the pending_send_sz after we have sampled + * pending_send_sz, and the ring buffer blocks before we commit the * read index, we could miss sending the interrupt. Issue a full * memory barrier to address this. */ virt_mb(); + /* + * If the pending_send_sz is zero, then the ring buffer is not + * blocked and there is no need to signal. This is far by the + * most common case, so exit quickly for best performance. + */ pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz); if (!pending_sz) return; @@ -476,14 +501,32 @@ void hv_pkt_iter_close(struct vmbus_channel *channel) bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index); /* - * If there was space before we began iteration, - * then host was not blocked. + * We want to signal the host only if we're transitioning + * from a "not enough free space" state to a "enough free + * space" state. For example, it's possible that this function + * could run and free up enough space to signal the host, and then + * run again and free up additional space before the host has a + * chance to clear the pending_send_sz. The 2nd invocation would + * be a null transition from "enough free space" to "enough free + * space", which doesn't warrant a signal. + * + * Exactly filling the ring buffer is treated as "not enough + * space". The ring buffer always must have at least one byte + * empty so the empty and full conditions are distinguishable. + * hv_get_bytes_to_write() doesn't fully tell the truth in + * this regard. + * + * So first check if we were in the "enough free space" state + * before we began the iteration. If so, the host was not + * blocked, and there's no need to signal. */ - if (curr_write_sz - bytes_read > pending_sz) return; - /* If pending write will not fit, don't give false hope. */ + /* + * Similarly, if the new state is "not enough space", then + * there's no need to signal. + */ if (curr_write_sz <= pending_sz) return; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index b10fe26c4891..b1b548a21f91 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -56,6 +56,8 @@ static struct completion probe_event; static int hyperv_cpuhp_online; +static void *hv_panic_page; + static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, void *args) { @@ -208,6 +210,20 @@ static ssize_t modalias_show(struct device *dev, } static DEVICE_ATTR_RO(modalias); +#ifdef CONFIG_NUMA +static ssize_t numa_node_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + + if (!hv_dev->channel) + return -ENODEV; + + return sprintf(buf, "%d\n", hv_dev->channel->numa_node); +} +static DEVICE_ATTR_RO(numa_node); +#endif + static ssize_t server_monitor_pending_show(struct device *dev, struct device_attribute *dev_attr, char *buf) @@ -490,6 +506,9 @@ static struct attribute *vmbus_dev_attrs[] = { &dev_attr_class_id.attr, &dev_attr_device_id.attr, &dev_attr_modalias.attr, +#ifdef CONFIG_NUMA + &dev_attr_numa_node.attr, +#endif &dev_attr_server_monitor_pending.attr, &dev_attr_client_monitor_pending.attr, &dev_attr_server_monitor_latency.attr, @@ -1018,6 +1037,72 @@ static void vmbus_isr(void) add_interrupt_randomness(HYPERVISOR_CALLBACK_VECTOR, 0); } +/* + * Boolean to control whether to report panic messages over Hyper-V. + * + * It can be set via /proc/sys/kernel/hyperv/record_panic_msg + */ +static int sysctl_record_panic_msg = 1; + +/* + * Callback from kmsg_dump. Grab as much as possible from the end of the kmsg + * buffer and call into Hyper-V to transfer the data. + */ +static void hv_kmsg_dump(struct kmsg_dumper *dumper, + enum kmsg_dump_reason reason) +{ + size_t bytes_written; + phys_addr_t panic_pa; + + /* We are only interested in panics. */ + if ((reason != KMSG_DUMP_PANIC) || (!sysctl_record_panic_msg)) + return; + + panic_pa = virt_to_phys(hv_panic_page); + + /* + * Write dump contents to the page. No need to synchronize; panic should + * be single-threaded. + */ + kmsg_dump_get_buffer(dumper, true, hv_panic_page, PAGE_SIZE, + &bytes_written); + if (bytes_written) + hyperv_report_panic_msg(panic_pa, bytes_written); +} + +static struct kmsg_dumper hv_kmsg_dumper = { + .dump = hv_kmsg_dump, +}; + +static struct ctl_table_header *hv_ctl_table_hdr; +static int zero; +static int one = 1; + +/* + * sysctl option to allow the user to control whether kmsg data should be + * reported to Hyper-V on panic. + */ +static struct ctl_table hv_ctl_table[] = { + { + .procname = "hyperv_record_panic_msg", + .data = &sysctl_record_panic_msg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one + }, + {} +}; + +static struct ctl_table hv_root_table[] = { + { + .procname = "kernel", + .mode = 0555, + .child = hv_ctl_table + }, + {} +}; /* * vmbus_bus_init -Main vmbus driver initialization routine. @@ -1065,6 +1150,32 @@ static int vmbus_bus_init(void) * Only register if the crash MSRs are available */ if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + u64 hyperv_crash_ctl; + /* + * Sysctl registration is not fatal, since by default + * reporting is enabled. + */ + hv_ctl_table_hdr = register_sysctl_table(hv_root_table); + if (!hv_ctl_table_hdr) + pr_err("Hyper-V: sysctl table register error"); + + /* + * Register for panic kmsg callback only if the right + * capability is supported by the hypervisor. + */ + hv_get_crash_ctl(hyperv_crash_ctl); + if (hyperv_crash_ctl & HV_CRASH_CTL_CRASH_NOTIFY_MSG) { + hv_panic_page = (void *)get_zeroed_page(GFP_KERNEL); + if (hv_panic_page) { + ret = kmsg_dump_register(&hv_kmsg_dumper); + if (ret) + pr_err("Hyper-V: kmsg dump register " + "error 0x%x\n", ret); + } else + pr_err("Hyper-V: panic message page memory " + "allocation failed"); + } + register_die_notifier(&hyperv_die_block); atomic_notifier_chain_register(&panic_notifier_list, &hyperv_panic_block); @@ -1081,7 +1192,9 @@ err_alloc: hv_remove_vmbus_irq(); bus_unregister(&hv_bus); - + free_page((unsigned long)hv_panic_page); + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; return ret; } @@ -1785,10 +1898,15 @@ static void __exit vmbus_exit(void) vmbus_free_channels(); if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + kmsg_dump_unregister(&hv_kmsg_dumper); unregister_die_notifier(&hyperv_die_block); atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_block); } + + free_page((unsigned long)hv_panic_page); + unregister_sysctl_table(hv_ctl_table_hdr); + hv_ctl_table_hdr = NULL; bus_unregister(&hv_bus); cpuhp_remove_state(hyperv_cpuhp_online); |