diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 4 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 34 | ||||
-rw-r--r-- | drivers/hv/hv.c | 152 | ||||
-rw-r--r-- | drivers/hv/hv_balloon.c | 26 | ||||
-rw-r--r-- | drivers/hv/hv_fcopy.c | 21 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 3 | ||||
-rw-r--r-- | drivers/hv/hv_utils_transport.c | 2 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 16 | ||||
-rw-r--r-- | drivers/hv/ring_buffer.c | 14 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 353 |
10 files changed, 488 insertions, 137 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 603ce97e9027..c4dcab048cb8 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -601,6 +601,7 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, u64 aligned_data = 0; int ret; bool signal = false; + int num_vecs = ((bufferlen != 0) ? 3 : 1); /* Setup the descriptor */ @@ -618,7 +619,8 @@ int vmbus_sendpacket_ctl(struct vmbus_channel *channel, void *buffer, bufferlist[2].iov_base = &aligned_data; bufferlist[2].iov_len = (packetlen_aligned - packetlen); - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, &signal); + ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, + &signal); /* * Signalling the host is conditional on many factors: diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 4506a6623618..2f9aead4ecfc 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -347,6 +347,7 @@ enum { IDE = 0, SCSI, NIC, + ND_NIC, MAX_PERF_CHN, }; @@ -391,6 +392,7 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui struct vmbus_channel *primary = channel->primary_channel; int next_node; struct cpumask available_mask; + struct cpumask *alloced_mask; for (i = IDE; i < MAX_PERF_CHN; i++) { if (!memcmp(type_guid->b, hp_devs[i].guid, @@ -408,7 +410,6 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui * channel, bind it to cpu 0. */ channel->numa_node = 0; - cpumask_set_cpu(0, &channel->alloced_cpus_in_node); channel->target_cpu = 0; channel->target_vp = hv_context.vp_index[0]; return; @@ -433,21 +434,38 @@ static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_gui channel->numa_node = next_node; primary = channel; } + alloced_mask = &hv_context.hv_numa_map[primary->numa_node]; - if (cpumask_weight(&primary->alloced_cpus_in_node) == + if (cpumask_weight(alloced_mask) == cpumask_weight(cpumask_of_node(primary->numa_node))) { /* * We have cycled through all the CPUs in the node; * reset the alloced map. */ - cpumask_clear(&primary->alloced_cpus_in_node); + cpumask_clear(alloced_mask); } - cpumask_xor(&available_mask, &primary->alloced_cpus_in_node, + cpumask_xor(&available_mask, alloced_mask, cpumask_of_node(primary->numa_node)); - cur_cpu = cpumask_next(-1, &available_mask); - cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node); + cur_cpu = -1; + while (true) { + cur_cpu = cpumask_next(cur_cpu, &available_mask); + if (cur_cpu >= nr_cpu_ids) { + cur_cpu = -1; + cpumask_copy(&available_mask, + cpumask_of_node(primary->numa_node)); + continue; + } + + if (!cpumask_test_cpu(cur_cpu, + &primary->alloced_cpus_in_node)) { + cpumask_set_cpu(cur_cpu, + &primary->alloced_cpus_in_node); + cpumask_set_cpu(cur_cpu, alloced_mask); + break; + } + } channel->target_cpu = cur_cpu; channel->target_vp = hv_context.vp_index[cur_cpu]; @@ -469,6 +487,10 @@ void vmbus_initiate_unload(void) { struct vmbus_channel_message_header hdr; + /* Pre-Win2012R2 hosts don't support reconnect */ + if (vmbus_proto_version < VERSION_WIN8_1) + return; + init_completion(&vmbus_connection.unload_event); memset(&hdr, 0, sizeof(struct vmbus_channel_message_header)); hdr.msgtype = CHANNELMSG_UNLOAD; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index d3943bceecc3..6341be8739ae 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -93,11 +93,14 @@ static int query_hypervisor_info(void) */ static u64 do_hypercall(u64 control, void *input, void *output) { -#ifdef CONFIG_X86_64 - u64 hv_status = 0; u64 input_address = (input) ? virt_to_phys(input) : 0; u64 output_address = (output) ? virt_to_phys(output) : 0; void *hypercall_page = hv_context.hypercall_page; +#ifdef CONFIG_X86_64 + u64 hv_status = 0; + + if (!hypercall_page) + return (u64)ULLONG_MAX; __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); __asm__ __volatile__("call *%3" : "=a" (hv_status) : @@ -112,13 +115,13 @@ static u64 do_hypercall(u64 control, void *input, void *output) u32 control_lo = control & 0xFFFFFFFF; u32 hv_status_hi = 1; u32 hv_status_lo = 1; - u64 input_address = (input) ? virt_to_phys(input) : 0; u32 input_address_hi = input_address >> 32; u32 input_address_lo = input_address & 0xFFFFFFFF; - u64 output_address = (output) ? virt_to_phys(output) : 0; u32 output_address_hi = output_address >> 32; u32 output_address_lo = output_address & 0xFFFFFFFF; - void *hypercall_page = hv_context.hypercall_page; + + if (!hypercall_page) + return (u64)ULLONG_MAX; __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), "=a"(hv_status_lo) : "d" (control_hi), @@ -130,6 +133,56 @@ static u64 do_hypercall(u64 control, void *input, void *output) #endif /* !x86_64 */ } +#ifdef CONFIG_X86_64 +static cycle_t read_hv_clock_tsc(struct clocksource *arg) +{ + cycle_t current_tick; + struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page; + + if (tsc_pg->tsc_sequence != -1) { + /* + * Use the tsc page to compute the value. + */ + + while (1) { + cycle_t tmp; + u32 sequence = tsc_pg->tsc_sequence; + u64 cur_tsc; + u64 scale = tsc_pg->tsc_scale; + s64 offset = tsc_pg->tsc_offset; + + rdtscll(cur_tsc); + /* current_tick = ((cur_tsc *scale) >> 64) + offset */ + asm("mulq %3" + : "=d" (current_tick), "=a" (tmp) + : "a" (cur_tsc), "r" (scale)); + + current_tick += offset; + if (tsc_pg->tsc_sequence == sequence) + return current_tick; + + if (tsc_pg->tsc_sequence != -1) + continue; + /* + * Fallback using MSR method. + */ + break; + } + } + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + return current_tick; +} + +static struct clocksource hyperv_cs_tsc = { + .name = "hyperv_clocksource_tsc_page", + .rating = 425, + .read = read_hv_clock_tsc, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; +#endif + + /* * hv_init - Main initialization routine. * @@ -139,7 +192,9 @@ int hv_init(void) { int max_leaf; union hv_x64_msr_hypercall_contents hypercall_msr; + union hv_x64_msr_hypercall_contents tsc_msr; void *virtaddr = NULL; + void *va_tsc = NULL; memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); memset(hv_context.synic_message_page, 0, @@ -183,6 +238,22 @@ int hv_init(void) hv_context.hypercall_page = virtaddr; +#ifdef CONFIG_X86_64 + if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { + va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + if (!va_tsc) + goto cleanup; + hv_context.tsc_page = va_tsc; + + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + + tsc_msr.enable = 1; + tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc); + + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); + } +#endif return 0; cleanup: @@ -216,6 +287,21 @@ void hv_cleanup(void) vfree(hv_context.hypercall_page); hv_context.hypercall_page = NULL; } + +#ifdef CONFIG_X86_64 + /* + * Cleanup the TSC page based CS. + */ + if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { + clocksource_change_rating(&hyperv_cs_tsc, 10); + clocksource_unregister(&hyperv_cs_tsc); + + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); + vfree(hv_context.tsc_page); + hv_context.tsc_page = NULL; + } +#endif } /* @@ -271,7 +357,7 @@ static int hv_ce_set_next_event(unsigned long delta, { cycle_t current_tick; - WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); + WARN_ON(!clockevent_state_oneshot(evt)); rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); current_tick += delta; @@ -279,31 +365,24 @@ static int hv_ce_set_next_event(unsigned long delta, return 0; } -static void hv_ce_setmode(enum clock_event_mode mode, - struct clock_event_device *evt) +static int hv_ce_shutdown(struct clock_event_device *evt) +{ + wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); + + return 0; +} + +static int hv_ce_set_oneshot(struct clock_event_device *evt) { union hv_timer_config timer_cfg; - switch (mode) { - case CLOCK_EVT_MODE_PERIODIC: - /* unsupported */ - break; - - case CLOCK_EVT_MODE_ONESHOT: - timer_cfg.enable = 1; - timer_cfg.auto_enable = 1; - timer_cfg.sintx = VMBUS_MESSAGE_SINT; - wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); - break; - - case CLOCK_EVT_MODE_UNUSED: - case CLOCK_EVT_MODE_SHUTDOWN: - wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); - wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); - break; - case CLOCK_EVT_MODE_RESUME: - break; - } + timer_cfg.enable = 1; + timer_cfg.auto_enable = 1; + timer_cfg.sintx = VMBUS_MESSAGE_SINT; + wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + + return 0; } static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) @@ -318,7 +397,8 @@ static void hv_init_clockevent_device(struct clock_event_device *dev, int cpu) * references to the hv_vmbus module making it impossible to unload. */ - dev->set_mode = hv_ce_setmode; + dev->set_state_shutdown = hv_ce_shutdown; + dev->set_state_oneshot = hv_ce_set_oneshot; dev->set_next_event = hv_ce_set_next_event; } @@ -329,6 +409,13 @@ int hv_synic_alloc(void) size_t ced_size = sizeof(struct clock_event_device); int cpu; + hv_context.hv_numa_map = kzalloc(sizeof(struct cpumask) * nr_node_ids, + GFP_ATOMIC); + if (hv_context.hv_numa_map == NULL) { + pr_err("Unable to allocate NUMA map\n"); + goto err; + } + for_each_online_cpu(cpu) { hv_context.event_dpc[cpu] = kmalloc(size, GFP_ATOMIC); if (hv_context.event_dpc[cpu] == NULL) { @@ -342,6 +429,7 @@ int hv_synic_alloc(void) pr_err("Unable to allocate clock event device\n"); goto err; } + hv_init_clockevent_device(hv_context.clk_evt[cpu], cpu); hv_context.synic_message_page[cpu] = @@ -390,6 +478,7 @@ void hv_synic_free(void) { int cpu; + kfree(hv_context.hv_numa_map); for_each_online_cpu(cpu) hv_synic_free_cpu(cpu); } @@ -503,8 +592,7 @@ void hv_synic_cleanup(void *arg) /* Turn off clockevent device */ if (ms_hyperv.features & HV_X64_MSR_SYNTIMER_AVAILABLE) - hv_ce_setmode(CLOCK_EVT_MODE_SHUTDOWN, - hv_context.clk_evt[cpu]); + hv_ce_shutdown(hv_context.clk_evt[cpu]); rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); @@ -530,6 +618,4 @@ void hv_synic_cleanup(void *arg) rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); sctrl.enable = 0; wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); - - hv_synic_free_cpu(cpu); } diff --git a/drivers/hv/hv_balloon.c b/drivers/hv/hv_balloon.c index 8a725cd69ad7..b853b4b083bd 100644 --- a/drivers/hv/hv_balloon.c +++ b/drivers/hv/hv_balloon.c @@ -62,11 +62,13 @@ enum { DYNMEM_PROTOCOL_VERSION_1 = DYNMEM_MAKE_VERSION(0, 3), DYNMEM_PROTOCOL_VERSION_2 = DYNMEM_MAKE_VERSION(1, 0), + DYNMEM_PROTOCOL_VERSION_3 = DYNMEM_MAKE_VERSION(2, 0), DYNMEM_PROTOCOL_VERSION_WIN7 = DYNMEM_PROTOCOL_VERSION_1, DYNMEM_PROTOCOL_VERSION_WIN8 = DYNMEM_PROTOCOL_VERSION_2, + DYNMEM_PROTOCOL_VERSION_WIN10 = DYNMEM_PROTOCOL_VERSION_3, - DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN8 + DYNMEM_PROTOCOL_VERSION_CURRENT = DYNMEM_PROTOCOL_VERSION_WIN10 }; @@ -1296,13 +1298,25 @@ static void version_resp(struct hv_dynmem_device *dm, if (dm->next_version == 0) goto version_error; - dm->next_version = 0; memset(&version_req, 0, sizeof(struct dm_version_request)); version_req.hdr.type = DM_VERSION_REQUEST; version_req.hdr.size = sizeof(struct dm_version_request); version_req.hdr.trans_id = atomic_inc_return(&trans_id); - version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN7; - version_req.is_last_attempt = 1; + version_req.version.version = dm->next_version; + + /* + * Set the next version to try in case current version fails. + * Win7 protocol ought to be the last one to try. + */ + switch (version_req.version.version) { + case DYNMEM_PROTOCOL_VERSION_WIN8: + dm->next_version = DYNMEM_PROTOCOL_VERSION_WIN7; + version_req.is_last_attempt = 0; + break; + default: + dm->next_version = 0; + version_req.is_last_attempt = 1; + } ret = vmbus_sendpacket(dm->dev->channel, &version_req, sizeof(struct dm_version_request), @@ -1442,7 +1456,7 @@ static int balloon_probe(struct hv_device *dev, dm_device.dev = dev; dm_device.state = DM_INITIALIZING; - dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN7; + dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8; init_completion(&dm_device.host_event); init_completion(&dm_device.config_event); INIT_LIST_HEAD(&dm_device.ha_region_list); @@ -1474,7 +1488,7 @@ static int balloon_probe(struct hv_device *dev, version_req.hdr.type = DM_VERSION_REQUEST; version_req.hdr.size = sizeof(struct dm_version_request); version_req.hdr.trans_id = atomic_inc_return(&trans_id); - version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN8; + version_req.version.version = DYNMEM_PROTOCOL_VERSION_WIN10; version_req.is_last_attempt = 0; ret = vmbus_sendpacket(dev->channel, &version_req, diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c index b50dd330cf31..db4b887b889d 100644 --- a/drivers/hv/hv_fcopy.c +++ b/drivers/hv/hv_fcopy.c @@ -116,7 +116,7 @@ static int fcopy_handle_handshake(u32 version) static void fcopy_send_data(struct work_struct *dummy) { - struct hv_start_fcopy smsg_out; + struct hv_start_fcopy *smsg_out = NULL; int operation = fcopy_transaction.fcopy_msg->operation; struct hv_start_fcopy *smsg_in; void *out_src; @@ -136,21 +136,24 @@ static void fcopy_send_data(struct work_struct *dummy) switch (operation) { case START_FILE_COPY: out_len = sizeof(struct hv_start_fcopy); - memset(&smsg_out, 0, out_len); - smsg_out.hdr.operation = operation; + smsg_out = kzalloc(sizeof(*smsg_out), GFP_KERNEL); + if (!smsg_out) + return; + + smsg_out->hdr.operation = operation; smsg_in = (struct hv_start_fcopy *)fcopy_transaction.fcopy_msg; utf16s_to_utf8s((wchar_t *)smsg_in->file_name, W_MAX_PATH, UTF16_LITTLE_ENDIAN, - (__u8 *)&smsg_out.file_name, W_MAX_PATH - 1); + (__u8 *)&smsg_out->file_name, W_MAX_PATH - 1); utf16s_to_utf8s((wchar_t *)smsg_in->path_name, W_MAX_PATH, UTF16_LITTLE_ENDIAN, - (__u8 *)&smsg_out.path_name, W_MAX_PATH - 1); + (__u8 *)&smsg_out->path_name, W_MAX_PATH - 1); - smsg_out.copy_flags = smsg_in->copy_flags; - smsg_out.file_size = smsg_in->file_size; - out_src = &smsg_out; + smsg_out->copy_flags = smsg_in->copy_flags; + smsg_out->file_size = smsg_in->file_size; + out_src = smsg_out; break; default: @@ -168,6 +171,8 @@ static void fcopy_send_data(struct work_struct *dummy) fcopy_transaction.state = HVUTIL_READY; } } + kfree(smsg_out); + return; } diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index d85798d5992c..74c38a9f34a6 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -353,6 +353,9 @@ kvp_send_key(struct work_struct *dummy) return; message = kzalloc(sizeof(*message), GFP_KERNEL); + if (!message) + return; + message->kvp_hdr.operation = operation; message->kvp_hdr.pool = pool; in_msg = kvp_transaction.kvp_msg; diff --git a/drivers/hv/hv_utils_transport.c b/drivers/hv/hv_utils_transport.c index ea7ba5ef16a9..6a9d80a5332d 100644 --- a/drivers/hv/hv_utils_transport.c +++ b/drivers/hv/hv_utils_transport.c @@ -186,7 +186,7 @@ int hvutil_transport_send(struct hvutil_transport *hvt, void *msg, int len) return -EINVAL; } else if (hvt->mode == HVUTIL_TRANSPORT_NETLINK) { cn_msg = kzalloc(sizeof(*cn_msg) + len, GFP_ATOMIC); - if (!msg) + if (!cn_msg) return -ENOMEM; cn_msg->id.idx = hvt->cn_id.idx; cn_msg->id.val = hvt->cn_id.val; diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index cddc0c9f6bf9..3d70e36c918e 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -141,7 +141,7 @@ struct hv_port_info { struct { u32 target_sint; u32 target_vp; - u16 base_flag_bumber; + u16 base_flag_number; u16 flag_count; u32 rsvdz; } event_port_info; @@ -517,6 +517,7 @@ struct hv_context { u64 guestid; void *hypercall_page; + void *tsc_page; bool synic_initialized; @@ -551,10 +552,23 @@ struct hv_context { * Support PV clockevent device. */ struct clock_event_device *clk_evt[NR_CPUS]; + /* + * To manage allocations in a NUMA node. + * Array indexed by numa node ID. + */ + struct cpumask *hv_numa_map; }; extern struct hv_context hv_context; +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; u32 current_read_index; diff --git a/drivers/hv/ring_buffer.c b/drivers/hv/ring_buffer.c index 6361d124f67d..70a1a9a22f87 100644 --- a/drivers/hv/ring_buffer.c +++ b/drivers/hv/ring_buffer.c @@ -103,10 +103,9 @@ static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi) * there is room for the producer to send the pending packet. */ -static bool hv_need_to_signal_on_read(u32 old_rd, - struct hv_ring_buffer_info *rbi) +static bool hv_need_to_signal_on_read(u32 prev_write_sz, + struct hv_ring_buffer_info *rbi) { - u32 prev_write_sz; u32 cur_write_sz; u32 r_size; u32 write_loc = rbi->ring_buffer->write_index; @@ -123,10 +122,6 @@ static bool hv_need_to_signal_on_read(u32 old_rd, cur_write_sz = write_loc >= read_loc ? r_size - (write_loc - read_loc) : read_loc - write_loc; - prev_write_sz = write_loc >= old_rd ? r_size - (write_loc - old_rd) : - old_rd - write_loc; - - if ((prev_write_sz < pending_sz) && (cur_write_sz >= pending_sz)) return true; @@ -517,7 +512,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, u32 next_read_location = 0; u64 prev_indices = 0; unsigned long flags; - u32 old_read; if (buflen <= 0) return -EINVAL; @@ -528,8 +522,6 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, &bytes_avail_toread, &bytes_avail_towrite); - old_read = bytes_avail_toread; - /* Make sure there is something to read */ if (bytes_avail_toread < buflen) { spin_unlock_irqrestore(&inring_info->ring_lock, flags); @@ -560,7 +552,7 @@ int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, void *buffer, spin_unlock_irqrestore(&inring_info->ring_lock, flags); - *signal = hv_need_to_signal_on_read(old_read, inring_info); + *signal = hv_need_to_signal_on_read(bytes_avail_towrite, inring_info); return 0; } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index cf204005ee78..f19b6f7a467a 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -39,6 +39,8 @@ #include <asm/mshyperv.h> #include <linux/notifier.h> #include <linux/ptrace.h> +#include <linux/screen_info.h> +#include <linux/kdebug.h> #include "hyperv_vmbus.h" static struct acpi_device *hv_acpi_dev; @@ -48,12 +50,18 @@ static struct completion probe_event; static int irq; -static int hyperv_panic_event(struct notifier_block *nb, - unsigned long event, void *ptr) +static void hyperv_report_panic(struct pt_regs *regs) { - struct pt_regs *regs; + static bool panic_reported; - regs = current_pt_regs(); + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + */ + if (panic_reported) + return; + panic_reported = true; wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); @@ -65,18 +73,37 @@ static int hyperv_panic_event(struct notifier_block *nb, * Let Hyper-V know there is crash data available */ wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} + +static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct pt_regs *regs; + + regs = current_pt_regs(); + + hyperv_report_panic(regs); return NOTIFY_DONE; } +static int hyperv_die_event(struct notifier_block *nb, unsigned long val, + void *args) +{ + struct die_args *die = (struct die_args *)args; + struct pt_regs *regs = die->regs; + + hyperv_report_panic(regs); + return NOTIFY_DONE; +} + +static struct notifier_block hyperv_die_block = { + .notifier_call = hyperv_die_event, +}; static struct notifier_block hyperv_panic_block = { .notifier_call = hyperv_panic_event, }; -struct resource hyperv_mmio = { - .name = "hyperv mmio", - .flags = IORESOURCE_MEM, -}; -EXPORT_SYMBOL_GPL(hyperv_mmio); +struct resource *hyperv_mmio; static int vmbus_exists(void) { @@ -414,6 +441,43 @@ static ssize_t in_write_bytes_avail_show(struct device *dev, } static DEVICE_ATTR_RO(in_write_bytes_avail); +static ssize_t channel_vp_mapping_show(struct device *dev, + struct device_attribute *dev_attr, + char *buf) +{ + struct hv_device *hv_dev = device_to_hv_device(dev); + struct vmbus_channel *channel = hv_dev->channel, *cur_sc; + unsigned long flags; + int buf_size = PAGE_SIZE, n_written, tot_written; + struct list_head *cur; + + if (!channel) + return -ENODEV; + + tot_written = snprintf(buf, buf_size, "%u:%u\n", + channel->offermsg.child_relid, channel->target_cpu); + + spin_lock_irqsave(&channel->lock, flags); + + list_for_each(cur, &channel->sc_list) { + if (tot_written >= buf_size - 1) + break; + + cur_sc = list_entry(cur, struct vmbus_channel, sc_list); + n_written = scnprintf(buf + tot_written, + buf_size - tot_written, + "%u:%u\n", + cur_sc->offermsg.child_relid, + cur_sc->target_cpu); + tot_written += n_written; + } + + spin_unlock_irqrestore(&channel->lock, flags); + + return tot_written; +} +static DEVICE_ATTR_RO(channel_vp_mapping); + /* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */ static struct attribute *vmbus_attrs[] = { &dev_attr_id.attr, @@ -438,6 +502,7 @@ static struct attribute *vmbus_attrs[] = { &dev_attr_in_write_index.attr, &dev_attr_in_read_bytes_avail.attr, &dev_attr_in_write_bytes_avail.attr, + &dev_attr_channel_vp_mapping.attr, NULL, }; ATTRIBUTE_GROUPS(vmbus); @@ -763,38 +828,6 @@ static void vmbus_isr(void) } } -#ifdef CONFIG_HOTPLUG_CPU -static int hyperv_cpu_disable(void) -{ - return -ENOSYS; -} - -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ - static void *previous_cpu_disable; - - /* - * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8, - * ...) is not supported at this moment as channel interrupts are - * distributed across all of them. - */ - - if ((vmbus_proto_version == VERSION_WS2008) || - (vmbus_proto_version == VERSION_WIN7)) - return; - - if (vmbus_loaded) { - previous_cpu_disable = smp_ops.cpu_disable; - smp_ops.cpu_disable = hyperv_cpu_disable; - pr_notice("CPU offlining is not supported by hypervisor\n"); - } else if (previous_cpu_disable) - smp_ops.cpu_disable = previous_cpu_disable; -} -#else -static void hv_cpu_hotplug_quirk(bool vmbus_loaded) -{ -} -#endif /* * vmbus_bus_init -Main vmbus driver initialization routine. @@ -836,12 +869,14 @@ static int vmbus_bus_init(int irq) if (ret) goto err_alloc; - hv_cpu_hotplug_quirk(true); + if (vmbus_proto_version > VERSION_WIN7) + cpu_hotplug_disable(); /* * Only register if the crash MSRs are available */ - if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + register_die_notifier(&hyperv_die_block); atomic_notifier_chain_register(&panic_notifier_list, &hyperv_panic_block); } @@ -863,8 +898,8 @@ err_cleanup: } /** - * __vmbus_child_driver_register - Register a vmbus's driver - * @drv: Pointer to driver structure you want to register + * __vmbus_child_driver_register() - Register a vmbus's driver + * @hv_driver: Pointer to driver structure you want to register * @owner: owner module of the drv * @mod_name: module name string * @@ -896,7 +931,8 @@ EXPORT_SYMBOL_GPL(__vmbus_driver_register); /** * vmbus_driver_unregister() - Unregister a vmbus's driver - * @drv: Pointer to driver structure you want to un-register + * @hv_driver: Pointer to driver structure you want to + * un-register * * Un-register the given driver that was previous registered with a call to * vmbus_driver_register() @@ -982,30 +1018,184 @@ void vmbus_device_unregister(struct hv_device *device_obj) /* - * VMBUS is an acpi enumerated device. Get the the information we + * VMBUS is an acpi enumerated device. Get the information we * need from DSDT. */ - +#define VTPM_BASE_ADDRESS 0xfed40000 static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx) { + resource_size_t start = 0; + resource_size_t end = 0; + struct resource *new_res; + struct resource **old_res = &hyperv_mmio; + struct resource **prev_res = NULL; + switch (res->type) { case ACPI_RESOURCE_TYPE_IRQ: irq = res->data.irq.interrupts[0]; + return AE_OK; + + /* + * "Address" descriptors are for bus windows. Ignore + * "memory" descriptors, which are for registers on + * devices. + */ + case ACPI_RESOURCE_TYPE_ADDRESS32: + start = res->data.address32.address.minimum; + end = res->data.address32.address.maximum; break; case ACPI_RESOURCE_TYPE_ADDRESS64: - hyperv_mmio.start = res->data.address64.address.minimum; - hyperv_mmio.end = res->data.address64.address.maximum; + start = res->data.address64.address.minimum; + end = res->data.address64.address.maximum; break; + + default: + /* Unused resource type */ + return AE_OK; + } + /* + * Ignore ranges that are below 1MB, as they're not + * necessary or useful here. + */ + if (end < 0x100000) + return AE_OK; + + new_res = kzalloc(sizeof(*new_res), GFP_ATOMIC); + if (!new_res) + return AE_NO_MEMORY; + + /* If this range overlaps the virtual TPM, truncate it. */ + if (end > VTPM_BASE_ADDRESS && start < VTPM_BASE_ADDRESS) + end = VTPM_BASE_ADDRESS; + + new_res->name = "hyperv mmio"; + new_res->flags = IORESOURCE_MEM; + new_res->start = start; + new_res->end = end; + + do { + if (!*old_res) { + *old_res = new_res; + break; + } + + if ((*old_res)->end < new_res->start) { + new_res->sibling = *old_res; + if (prev_res) + (*prev_res)->sibling = new_res; + *old_res = new_res; + break; + } + + prev_res = old_res; + old_res = &(*old_res)->sibling; + + } while (1); return AE_OK; } +static int vmbus_acpi_remove(struct acpi_device *device) +{ + struct resource *cur_res; + struct resource *next_res; + + if (hyperv_mmio) { + for (cur_res = hyperv_mmio; cur_res; cur_res = next_res) { + next_res = cur_res->sibling; + kfree(cur_res); + } + } + + return 0; +} + +/** + * vmbus_allocate_mmio() - Pick a memory-mapped I/O range. + * @new: If successful, supplied a pointer to the + * allocated MMIO space. + * @device_obj: Identifies the caller + * @min: Minimum guest physical address of the + * allocation + * @max: Maximum guest physical address + * @size: Size of the range to be allocated + * @align: Alignment of the range to be allocated + * @fb_overlap_ok: Whether this allocation can be allowed + * to overlap the video frame buffer. + * + * This function walks the resources granted to VMBus by the + * _CRS object in the ACPI namespace underneath the parent + * "bridge" whether that's a root PCI bus in the Generation 1 + * case or a Module Device in the Generation 2 case. It then + * attempts to allocate from the global MMIO pool in a way that + * matches the constraints supplied in these parameters and by + * that _CRS. + * + * Return: 0 on success, -errno on failure + */ +int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, + resource_size_t min, resource_size_t max, + resource_size_t size, resource_size_t align, + bool fb_overlap_ok) +{ + struct resource *iter; + resource_size_t range_min, range_max, start, local_min, local_max; + const char *dev_n = dev_name(&device_obj->device); + u32 fb_end = screen_info.lfb_base + (screen_info.lfb_size << 1); + int i; + + for (iter = hyperv_mmio; iter; iter = iter->sibling) { + if ((iter->start >= max) || (iter->end <= min)) + continue; + + range_min = iter->start; + range_max = iter->end; + + /* If this range overlaps the frame buffer, split it into + two tries. */ + for (i = 0; i < 2; i++) { + local_min = range_min; + local_max = range_max; + if (fb_overlap_ok || (range_min >= fb_end) || + (range_max <= screen_info.lfb_base)) { + i++; + } else { + if ((range_min <= screen_info.lfb_base) && + (range_max >= screen_info.lfb_base)) { + /* + * The frame buffer is in this window, + * so trim this into the part that + * preceeds the frame buffer. + */ + local_max = screen_info.lfb_base - 1; + range_min = fb_end; + } else { + range_min = fb_end; + continue; + } + } + + start = (local_min + align - 1) & ~(align - 1); + for (; start + size - 1 <= local_max; start += align) { + *new = request_mem_region_exclusive(start, size, + dev_n); + if (*new) + return 0; + } + } + } + + return -ENXIO; +} +EXPORT_SYMBOL_GPL(vmbus_allocate_mmio); + static int vmbus_acpi_add(struct acpi_device *device) { acpi_status result; int ret_val = -ENODEV; + struct acpi_device *ancestor; hv_acpi_dev = device; @@ -1015,35 +1205,27 @@ static int vmbus_acpi_add(struct acpi_device *device) if (ACPI_FAILURE(result)) goto acpi_walk_err; /* - * The parent of the vmbus acpi device (Gen2 firmware) is the VMOD that - * has the mmio ranges. Get that. + * Some ancestor of the vmbus acpi device (Gen1 or Gen2 + * firmware) is the VMOD that has the mmio ranges. Get that. */ - if (device->parent) { - result = acpi_walk_resources(device->parent->handle, - METHOD_NAME__CRS, - vmbus_walk_resources, NULL); + for (ancestor = device->parent; ancestor; ancestor = ancestor->parent) { + result = acpi_walk_resources(ancestor->handle, METHOD_NAME__CRS, + vmbus_walk_resources, NULL); if (ACPI_FAILURE(result)) - goto acpi_walk_err; - if (hyperv_mmio.start && hyperv_mmio.end) - request_resource(&iomem_resource, &hyperv_mmio); + continue; + if (hyperv_mmio) + break; } ret_val = 0; acpi_walk_err: complete(&probe_event); + if (ret_val) + vmbus_acpi_remove(device); return ret_val; } -static int vmbus_acpi_remove(struct acpi_device *device) -{ - int ret = 0; - - if (hyperv_mmio.start && hyperv_mmio.end) - ret = release_resource(&hyperv_mmio); - return ret; -} - static const struct acpi_device_id vmbus_acpi_device_ids[] = { {"VMBUS", 0}, {"VMBus", 0}, @@ -1060,6 +1242,29 @@ static struct acpi_driver vmbus_acpi_driver = { }, }; +static void hv_kexec_handler(void) +{ + int cpu; + + hv_synic_clockevents_cleanup(); + vmbus_initiate_unload(); + for_each_online_cpu(cpu) + smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); + hv_cleanup(); +}; + +static void hv_crash_handler(struct pt_regs *regs) +{ + vmbus_initiate_unload(); + /* + * In crash handler we can't schedule synic cleanup for all CPUs, + * doing the cleanup for current CPU only. This should be sufficient + * for kdump. + */ + hv_synic_cleanup(NULL); + hv_cleanup(); +}; + static int __init hv_acpi_init(void) { int ret, t; @@ -1092,6 +1297,9 @@ static int __init hv_acpi_init(void) if (ret) goto cleanup; + hv_setup_kexec_handler(hv_kexec_handler); + hv_setup_crash_handler(hv_crash_handler); + return 0; cleanup: @@ -1104,13 +1312,16 @@ static void __exit vmbus_exit(void) { int cpu; + hv_remove_kexec_handler(); + hv_remove_crash_handler(); vmbus_connection.conn_state = DISCONNECTED; hv_synic_clockevents_cleanup(); vmbus_disconnect(); hv_remove_vmbus_irq(); tasklet_kill(&msg_dpc); vmbus_free_channels(); - if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) { + unregister_die_notifier(&hyperv_die_block); atomic_notifier_chain_unregister(&panic_notifier_list, &hyperv_panic_block); } @@ -1120,8 +1331,10 @@ static void __exit vmbus_exit(void) tasklet_kill(hv_context.event_dpc[cpu]); smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1); } + hv_synic_free(); acpi_bus_unregister_driver(&vmbus_acpi_driver); - hv_cpu_hotplug_quirk(false); + if (vmbus_proto_version > VERSION_WIN7) + cpu_hotplug_enable(); } |