From 87712bf81dd092821c406ea3fb47a07222484a64 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sun, 14 Dec 2014 23:34:51 -0800 Subject: Drivers: hv: vmbus: Use get_cpu() to get the current CPU Replace calls for smp_processor_id() to get_cpu() to get the CPU ID of the current CPU. In these instances, there is no correctness issue with regards to preemption, we just need the current CPU ID. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 4 +++- drivers/hv/connection.c | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 2c59f030546b..1d7df2576b1c 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -815,7 +815,7 @@ cleanup: struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) { struct list_head *cur, *tmp; - int cur_cpu = hv_context.vp_index[smp_processor_id()]; + int cur_cpu; struct vmbus_channel *cur_channel; struct vmbus_channel *outgoing_channel = primary; int cpu_distance, new_cpu_distance; @@ -823,6 +823,8 @@ struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary) if (list_empty(&primary->sc_list)) return outgoing_channel; + cur_cpu = hv_context.vp_index[get_cpu()]; + put_cpu(); list_for_each_safe(cur, tmp, &primary->sc_list) { cur_channel = list_entry(cur, struct vmbus_channel, sc_list); if (cur_channel->state != CHANNEL_OPENED_STATE) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index e206619b946e..a63a795300b9 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -80,8 +80,10 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, msg->interrupt_page = virt_to_phys(vmbus_connection.int_page); msg->monitor_page1 = virt_to_phys(vmbus_connection.monitor_pages[0]); msg->monitor_page2 = virt_to_phys(vmbus_connection.monitor_pages[1]); - if (version == VERSION_WIN8_1) - msg->target_vcpu = hv_context.vp_index[smp_processor_id()]; + if (version == VERSION_WIN8_1) { + msg->target_vcpu = hv_context.vp_index[get_cpu()]; + put_cpu(); + } /* * Add to list before we send the request since we may -- cgit v1.2.3 From 79208c57da5311860f165b613c89b3f647e357cd Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:29 -0800 Subject: Drivers: hv: hv_balloon: Make adjustments in computing the floor Make adjustments in computing the balloon floor. The current computation of the balloon floor was not appropriate for virtual machines with more than 10 GB of assigned memory - we would get into situations where the host would agressively balloon down the guest and leave the guest in an unusable state. This patch fixes the issue by raising the floor. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index b958ded8ac7e..9cbbb831778a 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -928,9 +928,8 @@ static unsigned long compute_balloon_floor(void) * 128 72 (1/2) * 512 168 (1/4) * 2048 360 (1/8) - * 8192 552 (1/32) - * 32768 1320 - * 131072 4392 + * 8192 768 (1/16) + * 32768 1536 (1/32) */ if (totalram_pages < MB2PAGES(128)) min_pages = MB2PAGES(8) + (totalram_pages >> 1); @@ -938,8 +937,10 @@ static unsigned long compute_balloon_floor(void) min_pages = MB2PAGES(40) + (totalram_pages >> 2); else if (totalram_pages < MB2PAGES(2048)) min_pages = MB2PAGES(104) + (totalram_pages >> 3); + else if (totalram_pages < MB2PAGES(8192)) + min_pages = MB2PAGES(256) + (totalram_pages >> 4); else - min_pages = MB2PAGES(296) + (totalram_pages >> 5); + min_pages = MB2PAGES(512) + (totalram_pages >> 5); #undef MB2PAGES return min_pages; } -- cgit v1.2.3 From 22f88475b62ac826acae2f77c3e1bd9543e87b2a Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:30 -0800 Subject: Drivers: hv: hv_balloon: Fix a locking bug in the balloon driver We support memory hot-add in the Hyper-V balloon driver by hot adding an appropriately sized and aligned region and controlling the on-lining of pages within that region based on the pages that the host wants us to online. We do this because the granularity and alignment requirements in Linux are different from what Windows expects. The state to manage the onlining of pages needs to be correctly protected. Fix this bug. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 68 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 5 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 9cbbb831778a..8e30415b0eb7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -533,6 +533,9 @@ struct hv_dynmem_device { */ struct task_struct *thread; + struct mutex ha_region_mutex; + struct completion waiter_event; + /* * A list of hot-add regions. */ @@ -549,7 +552,59 @@ struct hv_dynmem_device { static struct hv_dynmem_device dm_device; static void post_status(struct hv_dynmem_device *dm); + #ifdef CONFIG_MEMORY_HOTPLUG +static void acquire_region_mutex(bool trylock) +{ + if (trylock) { + reinit_completion(&dm_device.waiter_event); + while (!mutex_trylock(&dm_device.ha_region_mutex)) + wait_for_completion(&dm_device.waiter_event); + } else { + mutex_lock(&dm_device.ha_region_mutex); + } +} + +static void release_region_mutex(bool trylock) +{ + if (trylock) { + mutex_unlock(&dm_device.ha_region_mutex); + } else { + mutex_unlock(&dm_device.ha_region_mutex); + complete(&dm_device.waiter_event); + } +} + +static int hv_memory_notifier(struct notifier_block *nb, unsigned long val, + void *v) +{ + switch (val) { + case MEM_GOING_ONLINE: + acquire_region_mutex(true); + break; + + case MEM_ONLINE: + case MEM_CANCEL_ONLINE: + release_region_mutex(true); + if (dm_device.ha_waiting) { + dm_device.ha_waiting = false; + complete(&dm_device.ol_waitevent); + } + break; + + case MEM_GOING_OFFLINE: + case MEM_OFFLINE: + case MEM_CANCEL_OFFLINE: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block hv_memory_nb = { + .notifier_call = hv_memory_notifier, + .priority = 0 +}; + static void hv_bring_pgs_online(unsigned long start_pfn, unsigned long size) { @@ -591,6 +646,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, init_completion(&dm_device.ol_waitevent); dm_device.ha_waiting = true; + release_region_mutex(false); nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn)); ret = add_memory(nid, PFN_PHYS((start_pfn)), (HA_CHUNK << PAGE_SHIFT)); @@ -619,6 +675,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size, * have not been "onlined" within the allowed time. */ wait_for_completion_timeout(&dm_device.ol_waitevent, 5*HZ); + acquire_region_mutex(false); post_status(&dm_device); } @@ -632,11 +689,6 @@ static void hv_online_page(struct page *pg) unsigned long cur_start_pgp; unsigned long cur_end_pgp; - if (dm_device.ha_waiting) { - dm_device.ha_waiting = false; - complete(&dm_device.ol_waitevent); - } - list_for_each(cur, &dm_device.ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); cur_start_pgp = (unsigned long) @@ -834,6 +886,7 @@ static void hot_add_req(struct work_struct *dummy) resp.hdr.size = sizeof(struct dm_hot_add_response); #ifdef CONFIG_MEMORY_HOTPLUG + acquire_region_mutex(false); pg_start = dm->ha_wrk.ha_page_range.finfo.start_page; pfn_cnt = dm->ha_wrk.ha_page_range.finfo.page_cnt; @@ -865,6 +918,7 @@ static void hot_add_req(struct work_struct *dummy) if (do_hot_add) resp.page_count = process_hot_add(pg_start, pfn_cnt, rg_start, rg_sz); + release_region_mutex(false); #endif /* * The result field of the response structure has the @@ -1388,7 +1442,9 @@ static int balloon_probe(struct hv_device *dev, dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); + init_completion(&dm_device.waiter_event); INIT_LIST_HEAD(&dm_device.ha_region_list); + mutex_init(&dm_device.ha_region_mutex); INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up); INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req); dm_device.host_specified_ha_region = false; @@ -1402,6 +1458,7 @@ static int balloon_probe(struct hv_device *dev, #ifdef CONFIG_MEMORY_HOTPLUG set_online_page_callback(&hv_online_page); + register_memory_notifier(&hv_memory_nb); #endif hv_set_drvdata(dev, &dm_device); @@ -1520,6 +1577,7 @@ static int balloon_remove(struct hv_device *dev) kfree(send_buffer); #ifdef CONFIG_MEMORY_HOTPLUG restore_online_page_callback(&hv_online_page); + unregister_memory_notifier(&hv_memory_nb); #endif list_for_each_safe(cur, tmp, &dm->ha_region_list) { has = list_entry(cur, struct hv_hotadd_state, list); -- cgit v1.2.3 From ab3de22bb4a3d4bda2d0ec8bebcb76a40f1cbf9b Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:31 -0800 Subject: Drivers: hv: hv_balloon: Don't post pressure status from interrupt context We currently release memory (balloon down) in the interrupt context and we also post memory status while releasing memory. Rather than posting the status in the interrupt context, wakeup the status posting thread to post the status. This will address the inconsistent lock state that Sitsofe Wheeler reported: http://lkml.iu.edu/hypermail/linux/kernel/1411.1/00075.html Signed-off-by: K. Y. Srinivasan Reported-by: Sitsofe Wheeler Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_balloon.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 8e30415b0eb7..ff169386b2c7 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -1226,7 +1226,7 @@ static void balloon_down(struct hv_dynmem_device *dm, for (i = 0; i < range_count; i++) { free_balloon_pages(dm, &range_array[i]); - post_status(&dm_device); + complete(&dm_device.config_event); } if (req->more_pages == 1) @@ -1250,19 +1250,16 @@ static void balloon_onchannelcallback(void *context); static int dm_thread_func(void *dm_dev) { struct hv_dynmem_device *dm = dm_dev; - int t; while (!kthread_should_stop()) { - t = wait_for_completion_interruptible_timeout( + wait_for_completion_interruptible_timeout( &dm_device.config_event, 1*HZ); /* * The host expects us to post information on the memory * pressure every second. */ - - if (t == 0) - post_status(dm); - + reinit_completion(&dm_device.config_event); + post_status(dm); } return 0; -- cgit v1.2.3 From 4061ed9e2aaac31daef44f06e9b83143c78b24b2 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:32 -0800 Subject: Drivers: hv: vmbus: Implement a clockevent device Implement a clockevent device based on the timer support available on Hyper-V. In this version of the patch I have addressed Jason's review comments. Signed-off-by: K. Y. Srinivasan Reviewed-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/uapi/asm/hyperv.h | 11 ++++++ drivers/hv/hv.c | 78 ++++++++++++++++++++++++++++++++++++++ drivers/hv/hyperv_vmbus.h | 21 ++++++++++ drivers/hv/vmbus_drv.c | 37 +++++++++++++++++- 4 files changed, 145 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 462efe746d77..90c458e66e13 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -187,6 +187,17 @@ #define HV_X64_MSR_SINT14 0x4000009E #define HV_X64_MSR_SINT15 0x4000009F +/* + * Synthetic Timer MSRs. Four timers per vcpu. + */ +#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0 +#define HV_X64_MSR_STIMER0_COUNT 0x400000B1 +#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2 +#define HV_X64_MSR_STIMER1_COUNT 0x400000B3 +#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4 +#define HV_X64_MSR_STIMER2_COUNT 0x400000B5 +#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6 +#define HV_X64_MSR_STIMER3_COUNT 0x400000B7 #define HV_X64_MSR_HYPERCALL_ENABLE 0x00000001 #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT 12 diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 3e4235c7a47f..50e51a51ff8b 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -28,7 +28,9 @@ #include #include #include +#include #include +#include #include "hyperv_vmbus.h" /* The one and only */ @@ -37,6 +39,10 @@ struct hv_context hv_context = { .hypercall_page = NULL, }; +#define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ +#define HV_MAX_MAX_DELTA_TICKS 0xffffffff +#define HV_MIN_DELTA_TICKS 1 + /* * query_hypervisor_info - Get version info of the windows hypervisor */ @@ -144,6 +150,8 @@ int hv_init(void) sizeof(int) * NR_CPUS); memset(hv_context.event_dpc, 0, sizeof(void *) * NR_CPUS); + memset(hv_context.clk_evt, 0, + sizeof(void *) * NR_CPUS); max_leaf = query_hypervisor_info(); @@ -258,10 +266,63 @@ u16 hv_signal_event(void *con_id) return status; } +static int hv_ce_set_next_event(unsigned long delta, + struct clock_event_device *evt) +{ + cycle_t current_tick; + + WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); + + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + current_tick += delta; + wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick); + return 0; +} + +static void hv_ce_setmode(enum clock_event_mode mode, + struct clock_event_device *evt) +{ + union hv_timer_config timer_cfg; + + switch (mode) { + case CLOCK_EVT_MODE_PERIODIC: + /* unsupported */ + break; + + case CLOCK_EVT_MODE_ONESHOT: + timer_cfg.enable = 1; + timer_cfg.auto_enable = 1; + timer_cfg.sintx = VMBUS_MESSAGE_SINT; + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + break; + + case CLOCK_EVT_MODE_UNUSED: + case CLOCK_EVT_MODE_SHUTDOWN: + wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); + break; + case CLOCK_EVT_MODE_RESUME: + break; + } +} + +static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) +{ + dev->name = "Hyper-V clockevent"; + dev->features = CLOCK_EVT_FEAT_ONESHOT; + dev->cpumask = cpumask_of(cpu); + dev->rating = 1000; + dev->owner = THIS_MODULE; + + dev->set_mode = hv_ce_setmode; + dev->set_next_event = hv_ce_set_next_event; +} + int hv_synic_alloc(void) { size_t size = sizeof(struct tasklet_struct); + size_t ced_size = sizeof(struct clock_event_device); int cpu; for_each_online_cpu(cpu) { @@ -272,6 +333,13 @@ int hv_synic_alloc(void) } tasklet_init(hv_context.event_dpc[cpu], vmbus_on_event, cpu); + hv_context.clk_evt[cpu] = kzalloc(ced_size, GFP_ATOMIC); + if (hv_context.clk_evt[cpu] == NULL) { + pr_err("Unable to allocate clock event device\n"); + goto err; + } + hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); + hv_context.synic_message_page[cpu] = (void *)get_zeroed_page(GFP_ATOMIC); @@ -305,6 +373,7 @@ err: static void hv_synic_free_cpu(int cpu) { kfree(hv_context.event_dpc[cpu]); + kfree(hv_context.clk_evt[cpu]); if (hv_context.synic_event_page[cpu]) free_page((unsigned long)hv_context.synic_event_page[cpu]); if (hv_context.synic_message_page[cpu]) @@ -388,6 +457,15 @@ void hv_synic_init(void *arg) hv_context.vp_index[cpu] = (u32)vp_index; INIT_LIST_HEAD(&hv_context.percpu_list[cpu]); + + /* + * Register the per-cpu clockevent source. + */ + if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) + clockevents_config_and_register(hv_context.clk_evt[cpu], + HV_TIMER_FREQUENCY, + HV_MIN_DELTA_TICKS, + HV_MAX_MAX_DELTA_TICKS); return; } diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index c386d8dc7223..44b1c9424712 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -178,6 +178,23 @@ struct hv_message_header { }; }; +/* + * Timer configuration register. + */ +union hv_timer_config { + u64 as_uint64; + struct { + u64 enable:1; + u64 periodic:1; + u64 lazy:1; + u64 auto_enable:1; + u64 reserved_z0:12; + u64 sintx:4; + u64 reserved_z1:44; + }; +}; + + /* Define timer message payload structure. */ struct hv_timer_message_payload { u32 timer_index; @@ -519,6 +536,10 @@ struct hv_context { * buffer to post messages to the host. */ void *post_msg_page[NR_CPUS]; + /* + * Support PV clockevent device. + */ + struct clock_event_device *clk_evt[NR_CPUS]; }; extern struct hv_context hv_context; diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 4d6b26979fbd..7488111ec057 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -578,6 +579,34 @@ static void vmbus_onmessage_work(struct work_struct *work) kfree(ctx); } +void hv_process_timer_expiration(struct hv_message *msg, int cpu) +{ + struct clock_event_device *dev = hv_context.clk_evt[cpu]; + + if (dev->event_handler) + dev->event_handler(dev); + + msg->header.message_type = HVMSG_NONE; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + static void vmbus_on_msg_dpc(unsigned long data) { int cpu = smp_processor_id(); @@ -667,8 +696,12 @@ static void vmbus_isr(void) msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT; /* Check if there are actual msgs to be processed */ - if (msg->header.message_type != HVMSG_NONE) - tasklet_schedule(&msg_dpc); + if (msg->header.message_type != HVMSG_NONE) { + if (msg->header.message_type == HVMSG_TIMER_EXPIRED) + hv_process_timer_expiration(msg, cpu); + else + tasklet_schedule(&msg_dpc); + } } /* -- cgit v1.2.3 From 9f52a1630922bcdab75fc72e59ed58db8e164314 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:33 -0800 Subject: Drivers: hv: vmbus: Fix a bug in vmbus_establish_gpadl() Correctly compute the local (gpadl) handle. I would like to thank Michael Brown for seeing this bug. Signed-off-by: K. Y. Srinivasan Reported-by: Michael Brown Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 433f72a1c006..c76ffbe59f65 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -366,8 +366,8 @@ int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, unsigned long flags; int ret = 0; - next_gpadl_handle = atomic_read(&vmbus_connection.next_gpadl_handle); - atomic_inc(&vmbus_connection.next_gpadl_handle); + next_gpadl_handle = + (atomic_inc_return(&vmbus_connection.next_gpadl_handle) - 1); ret = create_gpadl_header(kbuffer, size, &msginfo, &msgcount); if (ret) -- cgit v1.2.3 From d61031ee8df6214d58371a1cc36a0591e242fba0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Fri, 9 Jan 2015 23:54:34 -0800 Subject: Drivers: hv: vmbus: Support a vmbus API for efficiently sending page arrays Currently, the API for sending a multi-page buffer over VMBUS is limited to a maximum pfn array of MAX_MULTIPAGE_BUFFER_COUNT. This limitation is not imposed by the host and unnecessarily limits the maximum payload that can be sent. Implement an API that does not have this restriction. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/hyperv.h | 31 +++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index c76ffbe59f65..18c4f23dacf1 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -683,6 +683,50 @@ int vmbus_sendpacket_pagebuffer(struct vmbus_channel *channel, } EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer); +/* + * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet + * using a GPADL Direct packet type. + * The buffer includes the vmbus descriptor. + */ +int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + struct vmbus_packet_mpb_array *desc, + u32 desc_size, + void *buffer, u32 bufferlen, u64 requestid) +{ + int ret; + u32 packetlen; + u32 packetlen_aligned; + struct kvec bufferlist[3]; + u64 aligned_data = 0; + bool signal = false; + + packetlen = desc_size + bufferlen; + packetlen_aligned = ALIGN(packetlen, sizeof(u64)); + + /* Setup the descriptor */ + desc->type = VM_PKT_DATA_USING_GPA_DIRECT; + desc->flags = VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED; + desc->dataoffset8 = desc_size >> 3; /* in 8-bytes grandularity */ + desc->length8 = (u16)(packetlen_aligned >> 3); + desc->transactionid = requestid; + desc->rangecount = 1; + + bufferlist[0].iov_base = desc; + bufferlist[0].iov_len = desc_size; + bufferlist[1].iov_base = buffer; + bufferlist[1].iov_len = bufferlen; + bufferlist[2].iov_base = &aligned_data; + bufferlist[2].iov_len = (packetlen_aligned - packetlen); + + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + + if (ret == 0 && signal) + vmbus_setevent(channel); + + return ret; +} +EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); + /* * vmbus_sendpacket_multipagebuffer - Send a multi-page buffer packet * using a GPADL Direct packet type. diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 476c685ca6f9..259023a34bec 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -57,6 +57,18 @@ struct hv_multipage_buffer { u64 pfn_array[MAX_MULTIPAGE_BUFFER_COUNT]; }; +/* + * Multiple-page buffer array; the pfn array is variable size: + * The number of entries in the PFN array is determined by + * "len" and "offset". + */ +struct hv_mpb_array { + /* Length and Offset determines the # of pfns in the array */ + u32 len; + u32 offset; + u64 pfn_array[]; +}; + /* 0x18 includes the proprietary packet header */ #define MAX_PAGE_BUFFER_PACKET (0x18 + \ (sizeof(struct hv_page_buffer) * \ @@ -814,6 +826,18 @@ struct vmbus_channel_packet_multipage_buffer { struct hv_multipage_buffer range; } __packed; +/* The format must be the same as struct vmdata_gpa_direct */ +struct vmbus_packet_mpb_array { + u16 type; + u16 dataoffset8; + u16 length8; + u16 flags; + u64 transactionid; + u32 reserved; + u32 rangecount; /* Always 1 in this case */ + struct hv_mpb_array range; +} __packed; + extern int vmbus_open(struct vmbus_channel *channel, u32 send_ringbuffersize, @@ -845,6 +869,13 @@ extern int vmbus_sendpacket_multipagebuffer(struct vmbus_channel *channel, u32 bufferlen, u64 requestid); +extern int vmbus_sendpacket_mpb_desc(struct vmbus_channel *channel, + struct vmbus_packet_mpb_array *mpb, + u32 desc_size, + void *buffer, + u32 bufferlen, + u64 requestid); + extern int vmbus_establish_gpadl(struct vmbus_channel *channel, void *kbuffer, u32 size, -- cgit v1.2.3 From d9b1652947c695d247b5e4603a16213ec55661ed Mon Sep 17 00:00:00 2001 From: Dexuan Cui Date: Wed, 14 Jan 2015 01:55:10 -0800 Subject: hv: hv_fcopy: drop the obsolete message on transfer failure In the case the user-space daemon crashes, hangs or is killed, we need to down the semaphore, otherwise, after the daemon starts next time, the obsolete data in fcopy_transaction.message or fcopy_transaction.fcopy_msg will be used immediately. Cc: Jason Wang Cc: Vitaly Kuznetsov Signed-off-by: Dexuan Cui Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/hv_fcopy.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'drivers/hv') diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index 23b2ce294c4c..cd453e4b2a07 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -86,6 +86,18 @@ static void fcopy_work_func(struct work_struct *dummy) * process the pending transaction. */ fcopy_respond_to_host(HV_E_FAIL); + + /* In the case the user-space daemon crashes, hangs or is killed, we + * need to down the semaphore, otherwise, after the daemon starts next + * time, the obsolete data in fcopy_transaction.message or + * fcopy_transaction.fcopy_msg will be used immediately. + * + * NOTE: fcopy_read() happens to get the semaphore (very rare)? We're + * still OK, because we've reported the failure to the host. + */ + if (down_trylock(&fcopy_transaction.read_sema)) + ; + } static int fcopy_handle_handshake(u32 version) @@ -344,6 +356,14 @@ static int fcopy_open(struct inode *inode, struct file *f) return 0; } +/* XXX: there are still some tricky corner cases, e.g., + * 1) In a SMP guest, when fcopy_release() runs between + * schedule_delayed_work() and fcopy_send_data(), there is + * still a chance an obsolete message will be queued. + * + * 2) When the fcopy daemon is running, if we unload the driver, + * we'll notice a kernel oops when we kill the daemon later. + */ static int fcopy_release(struct inode *inode, struct file *f) { /* @@ -351,6 +371,13 @@ static int fcopy_release(struct inode *inode, struct file *f) */ in_hand_shake = true; opened = false; + + if (cancel_delayed_work_sync(&fcopy_work)) { + /* We haven't up()-ed the semaphore(very rare)? */ + if (down_trylock(&fcopy_transaction.read_sema)) + ; + fcopy_respond_to_host(HV_E_FAIL); + } return 0; } -- cgit v1.2.3 From 9c3a6f7e476fc4961297fc66b1177f9f8c8dd238 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 20 Jan 2015 16:45:04 +0100 Subject: Drivers: hv: check vmbus_device_create() return value in vmbus_process_offer() vmbus_device_create() result is not being checked in vmbus_process_offer() and it can fail if kzalloc() fails. Add the check and do minor cleanup to avoid additional duplication of "free_channel(); return;" block. Reported-by: Jason Wang Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1d7df2576b1c..704c0e00f8d2 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -341,11 +341,10 @@ static void vmbus_process_offer(struct work_struct *work) if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); - return; + goto out; } - free_channel(newchannel); - return; + goto err_free_chan; } /* @@ -364,6 +363,8 @@ static void vmbus_process_offer(struct work_struct *work) &newchannel->offermsg.offer.if_type, &newchannel->offermsg.offer.if_instance, newchannel); + if (!newchannel->device_obj) + goto err_free_chan; /* * Add the new device to the bus. This will kick off device-driver @@ -379,9 +380,12 @@ static void vmbus_process_offer(struct work_struct *work) list_del(&newchannel->listentry); spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); kfree(newchannel->device_obj); - - free_channel(newchannel); + goto err_free_chan; } +out: + return; +err_free_chan: + free_channel(newchannel); } enum { -- cgit v1.2.3 From 67fae053bfc6e84144150e4c6c62670abb215c33 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 20 Jan 2015 16:45:05 +0100 Subject: Drivers: hv: rename sc_lock to the more generic lock sc_lock spinlock in struct vmbus_channel is being used to not only protect the sc_list field, e.g. vmbus_open() function uses it to implement test-and-set access to the state field. Rename it to the more generic 'lock' and add the description. Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Acked-by: Jason Wang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel.c | 6 +++--- drivers/hv/channel_mgmt.c | 10 +++++----- include/linux/hyperv.h | 7 ++++++- 3 files changed, 14 insertions(+), 9 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 18c4f23dacf1..2978f5ee8d2a 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -73,14 +73,14 @@ int vmbus_open(struct vmbus_channel *newchannel, u32 send_ringbuffer_size, unsigned long flags; int ret, t, err = 0; - spin_lock_irqsave(&newchannel->sc_lock, flags); + spin_lock_irqsave(&newchannel->lock, flags); if (newchannel->state == CHANNEL_OPEN_STATE) { newchannel->state = CHANNEL_OPENING_STATE; } else { - spin_unlock_irqrestore(&newchannel->sc_lock, flags); + spin_unlock_irqrestore(&newchannel->lock, flags); return -EINVAL; } - spin_unlock_irqrestore(&newchannel->sc_lock, flags); + spin_unlock_irqrestore(&newchannel->lock, flags); newchannel->onchannel_callback = onchannelcallback; newchannel->channel_callback_context = context; diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 704c0e00f8d2..1e0b996ed643 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -146,7 +146,7 @@ static struct vmbus_channel *alloc_channel(void) return NULL; spin_lock_init(&channel->inbound_lock); - spin_lock_init(&channel->sc_lock); + spin_lock_init(&channel->lock); INIT_LIST_HEAD(&channel->sc_list); INIT_LIST_HEAD(&channel->percpu_list); @@ -246,9 +246,9 @@ static void vmbus_process_rescind_offer(struct work_struct *work) spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags); } else { primary_channel = channel->primary_channel; - spin_lock_irqsave(&primary_channel->sc_lock, flags); + spin_lock_irqsave(&primary_channel->lock, flags); list_del(&channel->sc_list); - spin_unlock_irqrestore(&primary_channel->sc_lock, flags); + spin_unlock_irqrestore(&primary_channel->lock, flags); } free_channel(channel); } @@ -323,9 +323,9 @@ static void vmbus_process_offer(struct work_struct *work) * Process the sub-channel. */ newchannel->primary_channel = channel; - spin_lock_irqsave(&channel->sc_lock, flags); + spin_lock_irqsave(&channel->lock, flags); list_add_tail(&newchannel->sc_list, &channel->sc_list); - spin_unlock_irqrestore(&channel->sc_lock, flags); + spin_unlock_irqrestore(&channel->lock, flags); if (newchannel->target_cpu != get_cpu()) { put_cpu(); diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 259023a34bec..5a2ba674795e 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -734,7 +734,12 @@ struct vmbus_channel { */ void (*sc_creation_callback)(struct vmbus_channel *new_sc); - spinlock_t sc_lock; + /* + * The spinlock to protect the structure. It is being used to protect + * test-and-set access to various attributes of the structure as well + * as all sc_list operations. + */ + spinlock_t lock; /* * All Sub-channels of a primary channel are linked here. */ -- cgit v1.2.3 From d7f2fbafb4f84306436277664cf28042beaf252a Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Tue, 20 Jan 2015 16:45:06 +0100 Subject: Drivers: hv: vmbus: serialize Offer and Rescind offer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4b2f9abea52a ("staging: hv: convert channel_mgmt.c to not call osd_schedule_callback")' was written under an assumption that we never receive Rescind offer while we're still processing the initial Offer request. However, the issue we fixed in 04a258c162a8 could be caused by this assumption not always being true. In particular, we need to protect against the following: 1) Receiving a Rescind offer after we do queue_work() for processing an Offer request and before we actually enter vmbus_process_offer(). work.func points to vmbus_process_offer() at this moment and in vmbus_onoffer_rescind() we do another queue_work() without a check so we'll enter vmbus_process_offer() twice. 2) Receiving a Rescind offer after we enter vmbus_process_offer() and especially after we set >state = CHANNEL_OPEN_STATE. Many things can go wrong in that case, e.g. we can call free_channel() while we're still using it. Implement the required protection by changing work->func at the very end of vmbus_process_offer() and checking work->func in vmbus_onoffer_rescind(). In case we receive rescind offer during or before vmbus_process_offer() is done we set rescind flag to true and we check it at the end of vmbus_process_offer() so such offer will not get lost. Suggested-by: Radim Krčmář Signed-off-by: Vitaly Kuznetsov Acked-by: Jason Wang Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- drivers/hv/channel_mgmt.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'drivers/hv') diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 1e0b996ed643..3736f71bdec5 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -279,9 +279,6 @@ static void vmbus_process_offer(struct work_struct *work) int ret; unsigned long flags; - /* The next possible work is rescind handling */ - INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); - /* Make sure this is a new offer */ spin_lock_irqsave(&vmbus_connection.channel_lock, flags); @@ -341,7 +338,7 @@ static void vmbus_process_offer(struct work_struct *work) if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); - goto out; + goto done_init_rescind; } goto err_free_chan; @@ -382,7 +379,14 @@ static void vmbus_process_offer(struct work_struct *work) kfree(newchannel->device_obj); goto err_free_chan; } -out: +done_init_rescind: + spin_lock_irqsave(&newchannel->lock, flags); + /* The next possible work is rescind handling */ + INIT_WORK(&newchannel->work, vmbus_process_rescind_offer); + /* Check if rescind offer was already received */ + if (newchannel->rescind) + queue_work(newchannel->controlwq, &newchannel->work); + spin_unlock_irqrestore(&newchannel->lock, flags); return; err_free_chan: free_channel(newchannel); @@ -520,6 +524,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) { struct vmbus_channel_rescind_offer *rescind; struct vmbus_channel *channel; + unsigned long flags; rescind = (struct vmbus_channel_rescind_offer *)hdr; channel = relid2channel(rescind->child_relid); @@ -528,11 +533,20 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) /* Just return here, no channel found */ return; + spin_lock_irqsave(&channel->lock, flags); channel->rescind = true; + /* + * channel->work.func != vmbus_process_rescind_offer means we are still + * processing offer request and the rescind offer processing should be + * postponed. It will be done at the very end of vmbus_process_offer() + * as rescind flag is being checked there. + */ + if (channel->work.func == vmbus_process_rescind_offer) + /* work is initialized for vmbus_process_rescind_offer() from + * vmbus_process_offer() where the channel got created */ + queue_work(channel->controlwq, &channel->work); - /* work is initialized for vmbus_process_rescind_offer() from - * vmbus_process_offer() where the channel got created */ - queue_work(channel->controlwq, &channel->work); + spin_unlock_irqrestore(&channel->lock, flags); } /* -- cgit v1.2.3 From d8a60e000c951f845d9a5fb3e67853e0e63a5659 Mon Sep 17 00:00:00 2001 From: kbuild test robot Date: Mon, 26 Jan 2015 01:17:54 +0800 Subject: Drivers: hv: vmbus: hv_process_timer_expiration() can be static drivers/hv/vmbus_drv.c:582:6: sparse: symbol 'hv_process_timer_expiration' was not declared. Should it be static? Signed-off-by: Fengguang Wu Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Signed-off-by: Greg Kroah-Hartman --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/hv') diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 7488111ec057..35e3f422ad7b 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -579,7 +579,7 @@ static void vmbus_onmessage_work(struct work_struct *work) kfree(ctx); } -void hv_process_timer_expiration(struct hv_message *msg, int cpu) +static void hv_process_timer_expiration(struct hv_message *msg, int cpu) { struct clock_event_device *dev = hv_context.clk_evt[cpu]; -- cgit v1.2.3