diff options
Diffstat (limited to 'drivers/hv')
-rw-r--r-- | drivers/hv/channel.c | 8 | ||||
-rw-r--r-- | drivers/hv/channel_mgmt.c | 69 | ||||
-rw-r--r-- | drivers/hv/connection.c | 11 | ||||
-rw-r--r-- | drivers/hv/hv.c | 9 | ||||
-rw-r--r-- | drivers/hv/hv_kvp.c | 14 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 164 | ||||
-rw-r--r-- | drivers/hv/hyperv_vmbus.h | 11 | ||||
-rw-r--r-- | drivers/hv/vmbus_drv.c | 80 |
8 files changed, 184 insertions, 182 deletions
diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 736ac76d2a6a..e9bf0bb87ac4 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -630,9 +630,13 @@ void vmbus_close(struct vmbus_channel *channel) */ list_for_each_safe(cur, tmp, &channel->sc_list) { cur_channel = list_entry(cur, struct vmbus_channel, sc_list); - if (cur_channel->state != CHANNEL_OPENED_STATE) - continue; vmbus_close_internal(cur_channel); + if (cur_channel->rescind) { + mutex_lock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(cur_channel, + cur_channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); + } } /* * Now close the primary. diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 735f9363f2e4..0fabd410efd9 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -428,7 +428,6 @@ void vmbus_free_channels(void) { struct vmbus_channel *channel, *tmp; - mutex_lock(&vmbus_connection.channel_mutex); list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list, listentry) { /* hv_process_channel_removal() needs this */ @@ -436,7 +435,6 @@ void vmbus_free_channels(void) vmbus_device_unregister(channel->device_obj); } - mutex_unlock(&vmbus_connection.channel_mutex); } /* @@ -483,8 +481,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) list_add_tail(&newchannel->sc_list, &channel->sc_list); channel->num_sc++; spin_unlock_irqrestore(&channel->lock, flags); - } else + } else { + atomic_dec(&vmbus_connection.offer_in_progress); goto err_free_chan; + } } dev_type = hv_get_dev_type(newchannel); @@ -511,6 +511,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) if (!fnew) { if (channel->sc_creation_callback != NULL) channel->sc_creation_callback(newchannel); + atomic_dec(&vmbus_connection.offer_in_progress); return; } @@ -532,9 +533,7 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) * binding which eventually invokes the device driver's AddDevice() * method. */ - mutex_lock(&vmbus_connection.channel_mutex); ret = vmbus_device_register(newchannel->device_obj); - mutex_unlock(&vmbus_connection.channel_mutex); if (ret != 0) { pr_err("unable to add child device object (relid %d)\n", @@ -542,6 +541,8 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel) kfree(newchannel->device_obj); goto err_deq_chan; } + + atomic_dec(&vmbus_connection.offer_in_progress); return; err_deq_chan: @@ -797,6 +798,7 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr) newchannel = alloc_channel(); if (!newchannel) { vmbus_release_relid(offer->child_relid); + atomic_dec(&vmbus_connection.offer_in_progress); pr_err("Unable to allocate channel object\n"); return; } @@ -843,16 +845,38 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) rescind = (struct vmbus_channel_rescind_offer *)hdr; + /* + * The offer msg and the corresponding rescind msg + * from the host are guranteed to be ordered - + * offer comes in first and then the rescind. + * Since we process these events in work elements, + * and with preemption, we may end up processing + * the events out of order. Given that we handle these + * work elements on the same CPU, this is possible only + * in the case of preemption. In any case wait here + * until the offer processing has moved beyond the + * point where the channel is discoverable. + */ + + while (atomic_read(&vmbus_connection.offer_in_progress) != 0) { + /* + * We wait here until any channel offer is currently + * being processed. + */ + msleep(1); + } + mutex_lock(&vmbus_connection.channel_mutex); channel = relid2channel(rescind->child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); if (channel == NULL) { /* - * This is very impossible, because in - * vmbus_process_offer(), we have already invoked - * vmbus_release_relid() on error. + * We failed in processing the offer message; + * we would have cleaned up the relid in that + * failure path. */ - goto out; + return; } spin_lock_irqsave(&channel->lock, flags); @@ -864,7 +888,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) if (channel->device_obj) { if (channel->chn_rescind_callback) { channel->chn_rescind_callback(channel); - goto out; + return; } /* * We will have to unregister this device from the @@ -875,13 +899,26 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr) vmbus_device_unregister(channel->device_obj); put_device(dev); } - } else { - hv_process_channel_removal(channel, - channel->offermsg.child_relid); } - -out: - mutex_unlock(&vmbus_connection.channel_mutex); + if (channel->primary_channel != NULL) { + /* + * Sub-channel is being rescinded. Following is the channel + * close sequence when initiated from the driveri (refer to + * vmbus_close() for details): + * 1. Close all sub-channels first + * 2. Then close the primary channel. + */ + if (channel->state == CHANNEL_OPEN_STATE) { + /* + * The channel is currently not open; + * it is safe for us to cleanup the channel. + */ + mutex_lock(&vmbus_connection.channel_mutex); + hv_process_channel_removal(channel, + channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); + } + } } void vmbus_hvsock_device_unregister(struct vmbus_channel *channel) diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index fce27fb141cc..59c11ff90d12 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -93,10 +93,13 @@ static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, * all the CPUs. This is needed for kexec to work correctly where * the CPU attempting to connect may not be CPU 0. */ - if (version >= VERSION_WIN8_1) + if (version >= VERSION_WIN8_1) { msg->target_vcpu = hv_context.vp_index[smp_processor_id()]; - else + vmbus_connection.connect_cpu = smp_processor_id(); + } else { msg->target_vcpu = 0; + vmbus_connection.connect_cpu = 0; + } /* * Add to list before we send the request since we may @@ -370,7 +373,7 @@ int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) break; case HV_STATUS_INSUFFICIENT_MEMORY: case HV_STATUS_INSUFFICIENT_BUFFERS: - ret = -ENOMEM; + ret = -ENOBUFS; break; case HV_STATUS_SUCCESS: return ret; @@ -387,7 +390,7 @@ int vmbus_post_msg(void *buffer, size_t buflen, bool can_sleep) else mdelay(usec / 1000); - if (usec < 256000) + if (retries < 22) usec *= 2; } return ret; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 12e7baecb84e..2ea12207caa0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -82,10 +82,15 @@ int hv_post_message(union hv_connection_id connection_id, aligned_msg->message_type = message_type; aligned_msg->payload_size = payload_size; memcpy((void *)aligned_msg->payload, payload, payload_size); - put_cpu_ptr(hv_cpu); status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); + /* Preemption must remain disabled until after the hypercall + * so some other thread can't get scheduled onto this cpu and + * corrupt the per-cpu post_msg_page + */ + put_cpu_ptr(hv_cpu); + return status & 0xFFFF; } @@ -96,7 +101,7 @@ static int hv_ce_set_next_event(unsigned long delta, WARN_ON(!clockevent_state_oneshot(evt)); - hv_get_current_tick(current_tick); + current_tick = hyperv_cs->read(NULL); current_tick += delta; hv_init_timer(HV_X64_MSR_STIMER0_COUNT, current_tick); return 0; diff --git a/drivers/hv/hv_kvp.c b/drivers/hv/hv_kvp.c index e99ff2ddad40..9a90b915b5be 100644 --- a/drivers/hv/hv_kvp.c +++ b/drivers/hv/hv_kvp.c @@ -112,7 +112,7 @@ static void kvp_poll_wrapper(void *channel) { /* Transaction is finished, reset the state here to avoid races. */ kvp_transaction.state = HVUTIL_READY; - hv_kvp_onchannelcallback(channel); + tasklet_schedule(&((struct vmbus_channel *)channel)->callback_event); } static void kvp_register_done(void) @@ -159,7 +159,7 @@ static void kvp_timeout_func(struct work_struct *dummy) static void kvp_host_handshake_func(struct work_struct *dummy) { - hv_poll_channel(kvp_transaction.recv_channel, hv_kvp_onchannelcallback); + tasklet_schedule(&kvp_transaction.recv_channel->callback_event); } static int kvp_handle_handshake(struct hv_kvp_msg *msg) @@ -625,16 +625,17 @@ void hv_kvp_onchannelcallback(void *context) NEGO_IN_PROGRESS, NEGO_FINISHED} host_negotiatied = NEGO_NOT_STARTED; - if (host_negotiatied == NEGO_NOT_STARTED && - kvp_transaction.state < HVUTIL_READY) { + if (kvp_transaction.state < HVUTIL_READY) { /* * If userspace daemon is not connected and host is asking * us to negotiate we need to delay to not lose messages. * This is important for Failover IP setting. */ - host_negotiatied = NEGO_IN_PROGRESS; - schedule_delayed_work(&kvp_host_handshake_work, + if (host_negotiatied == NEGO_NOT_STARTED) { + host_negotiatied = NEGO_IN_PROGRESS; + schedule_delayed_work(&kvp_host_handshake_work, HV_UTIL_NEGO_TIMEOUT * HZ); + } return; } if (kvp_transaction.state > HVUTIL_READY) @@ -702,6 +703,7 @@ void hv_kvp_onchannelcallback(void *context) VM_PKT_DATA_INBAND, 0); host_negotiatied = NEGO_FINISHED; + hv_poll_channel(kvp_transaction.recv_channel, kvp_poll_wrapper); } } diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 186b10083c55..14dce25c104f 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -202,27 +202,39 @@ static void shutdown_onchannelcallback(void *context) /* * Set the host time in a process context. */ +static struct work_struct adj_time_work; -struct adj_time_work { - struct work_struct work; - u64 host_time; - u64 ref_time; - u8 flags; -}; +/* + * The last time sample, received from the host. PTP device responds to + * requests by using this data and the current partition-wide time reference + * count. + */ +static struct { + u64 host_time; + u64 ref_time; + spinlock_t lock; +} host_ts; -static void hv_set_host_time(struct work_struct *work) +static struct timespec64 hv_get_adj_host_time(void) { - struct adj_time_work *wrk; - struct timespec64 host_ts; - u64 reftime, newtime; - - wrk = container_of(work, struct adj_time_work, work); + struct timespec64 ts; + u64 newtime, reftime; + unsigned long flags; + spin_lock_irqsave(&host_ts.lock, flags); reftime = hyperv_cs->read(hyperv_cs); - newtime = wrk->host_time + (reftime - wrk->ref_time); - host_ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100); + newtime = host_ts.host_time + (reftime - host_ts.ref_time); + ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100); + spin_unlock_irqrestore(&host_ts.lock, flags); - do_settimeofday64(&host_ts); + return ts; +} + +static void hv_set_host_time(struct work_struct *work) +{ + struct timespec64 ts = hv_get_adj_host_time(); + + do_settimeofday64(&ts); } /* @@ -238,62 +250,35 @@ static void hv_set_host_time(struct work_struct *work) * typically used as a hint to the guest. The guest is under no obligation * to discipline the clock. */ -static struct adj_time_work wrk; - -/* - * The last time sample, received from the host. PTP device responds to - * requests by using this data and the current partition-wide time reference - * count. - */ -static struct { - u64 host_time; - u64 ref_time; - struct system_time_snapshot snap; - spinlock_t lock; -} host_ts; - static inline void adj_guesttime(u64 hosttime, u64 reftime, u8 adj_flags) { unsigned long flags; u64 cur_reftime; /* - * This check is safe since we are executing in the - * interrupt context and time synch messages are always - * delivered on the same CPU. + * Save the adjusted time sample from the host and the snapshot + * of the current system time. */ - if (adj_flags & ICTIMESYNCFLAG_SYNC) { - /* Queue a job to do do_settimeofday64() */ - if (work_pending(&wrk.work)) - return; - - wrk.host_time = hosttime; - wrk.ref_time = reftime; - wrk.flags = adj_flags; - schedule_work(&wrk.work); - } else { - /* - * Save the adjusted time sample from the host and the snapshot - * of the current system time for PTP device. - */ - spin_lock_irqsave(&host_ts.lock, flags); - - cur_reftime = hyperv_cs->read(hyperv_cs); - host_ts.host_time = hosttime; - host_ts.ref_time = cur_reftime; - ktime_get_snapshot(&host_ts.snap); - - /* - * TimeSync v4 messages contain reference time (guest's Hyper-V - * clocksource read when the time sample was generated), we can - * improve the precision by adding the delta between now and the - * time of generation. - */ - if (ts_srv_version > TS_VERSION_3) - host_ts.host_time += (cur_reftime - reftime); - - spin_unlock_irqrestore(&host_ts.lock, flags); - } + spin_lock_irqsave(&host_ts.lock, flags); + + cur_reftime = hyperv_cs->read(hyperv_cs); + host_ts.host_time = hosttime; + host_ts.ref_time = cur_reftime; + + /* + * TimeSync v4 messages contain reference time (guest's Hyper-V + * clocksource read when the time sample was generated), we can + * improve the precision by adding the delta between now and the + * time of generation. For older protocols we set + * reftime == cur_reftime on call. + */ + host_ts.host_time += (cur_reftime - reftime); + + spin_unlock_irqrestore(&host_ts.lock, flags); + + /* Schedule work to do do_settimeofday64() */ + if (adj_flags & ICTIMESYNCFLAG_SYNC) + schedule_work(&adj_time_work); } /* @@ -341,8 +326,8 @@ static void timesync_onchannelcallback(void *context) sizeof(struct vmbuspipe_hdr) + sizeof(struct icmsg_hdr)]; adj_guesttime(timedatap->parenttime, - 0, - timedatap->flags); + hyperv_cs->read(hyperv_cs), + timedatap->flags); } } @@ -526,58 +511,17 @@ static int hv_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) static int hv_ptp_gettime(struct ptp_clock_info *info, struct timespec64 *ts) { - unsigned long flags; - u64 newtime, reftime; - - spin_lock_irqsave(&host_ts.lock, flags); - reftime = hyperv_cs->read(hyperv_cs); - newtime = host_ts.host_time + (reftime - host_ts.ref_time); - *ts = ns_to_timespec64((newtime - WLTIMEDELTA) * 100); - spin_unlock_irqrestore(&host_ts.lock, flags); + *ts = hv_get_adj_host_time(); return 0; } -static int hv_ptp_get_syncdevicetime(ktime_t *device, - struct system_counterval_t *system, - void *ctx) -{ - system->cs = hyperv_cs; - system->cycles = host_ts.ref_time; - *device = ns_to_ktime((host_ts.host_time - WLTIMEDELTA) * 100); - - return 0; -} - -static int hv_ptp_getcrosststamp(struct ptp_clock_info *ptp, - struct system_device_crosststamp *xtstamp) -{ - unsigned long flags; - int ret; - - spin_lock_irqsave(&host_ts.lock, flags); - - /* - * host_ts contains the last time sample from the host and the snapshot - * of system time. We don't need to calculate the time delta between - * the reception and now as get_device_system_crosststamp() does the - * required interpolation. - */ - ret = get_device_system_crosststamp(hv_ptp_get_syncdevicetime, - NULL, &host_ts.snap, xtstamp); - - spin_unlock_irqrestore(&host_ts.lock, flags); - - return ret; -} - static struct ptp_clock_info ptp_hyperv_info = { .name = "hyperv", .enable = hv_ptp_enable, .adjtime = hv_ptp_adjtime, .adjfreq = hv_ptp_adjfreq, .gettime64 = hv_ptp_gettime, - .getcrosststamp = hv_ptp_getcrosststamp, .settime64 = hv_ptp_settime, .owner = THIS_MODULE, }; @@ -592,7 +536,7 @@ static int hv_timesync_init(struct hv_util_service *srv) spin_lock_init(&host_ts.lock); - INIT_WORK(&wrk.work, hv_set_host_time); + INIT_WORK(&adj_time_work, hv_set_host_time); /* * ptp_clock_register() returns NULL when CONFIG_PTP_1588_CLOCK is @@ -613,7 +557,7 @@ static void hv_timesync_deinit(void) { if (hv_ptp_clock) ptp_clock_unregister(hv_ptp_clock); - cancel_work_sync(&wrk.work); + cancel_work_sync(&adj_time_work); } static int __init init_hyperv_utils(void) diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 6113e915c50e..1b6a5e0dfa75 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -303,6 +303,13 @@ enum vmbus_connect_state { #define MAX_SIZE_CHANNEL_MESSAGE HV_MESSAGE_PAYLOAD_BYTE_COUNT struct vmbus_connection { + /* + * CPU on which the initial host contact was made. + */ + int connect_cpu; + + atomic_t offer_in_progress; + enum vmbus_connect_state conn_state; atomic_t next_gpadl_handle; @@ -411,6 +418,10 @@ static inline void hv_poll_channel(struct vmbus_channel *channel, if (!channel) return; + if (in_interrupt() && (channel->target_cpu == smp_processor_id())) { + cb(channel); + return; + } smp_call_function_single(channel->target_cpu, cb, channel, true); } diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0087b49095eb..ed84e96715a0 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -608,40 +608,6 @@ static void vmbus_free_dynids(struct hv_driver *drv) spin_unlock(&drv->dynids.lock); } -/* Parse string of form: 1b4e28ba-2fa1-11d2-883f-b9a761bde3f */ -static int get_uuid_le(const char *str, uuid_le *uu) -{ - unsigned int b[16]; - int i; - - if (strlen(str) < 37) - return -1; - - for (i = 0; i < 36; i++) { - switch (i) { - case 8: case 13: case 18: case 23: - if (str[i] != '-') - return -1; - break; - default: - if (!isxdigit(str[i])) - return -1; - } - } - - /* unparse little endian output byte order */ - if (sscanf(str, - "%2x%2x%2x%2x-%2x%2x-%2x%2x-%2x%2x-%2x%2x%2x%2x%2x%2x", - &b[3], &b[2], &b[1], &b[0], - &b[5], &b[4], &b[7], &b[6], &b[8], &b[9], - &b[10], &b[11], &b[12], &b[13], &b[14], &b[15]) != 16) - return -1; - - for (i = 0; i < 16; i++) - uu->b[i] = b[i]; - return 0; -} - /* * store_new_id - sysfs frontend to vmbus_add_dynid() * @@ -651,11 +617,12 @@ static ssize_t new_id_store(struct device_driver *driver, const char *buf, size_t count) { struct hv_driver *drv = drv_to_hv_drv(driver); - uuid_le guid = NULL_UUID_LE; + uuid_le guid; ssize_t retval; - if (get_uuid_le(buf, &guid) != 0) - return -EINVAL; + retval = uuid_le_to_bin(buf, &guid); + if (retval) + return retval; if (hv_vmbus_get_id(drv, &guid)) return -EEXIST; @@ -677,12 +644,14 @@ static ssize_t remove_id_store(struct device_driver *driver, const char *buf, { struct hv_driver *drv = drv_to_hv_drv(driver); struct vmbus_dynid *dynid, *n; - uuid_le guid = NULL_UUID_LE; - size_t retval = -ENODEV; + uuid_le guid; + ssize_t retval; - if (get_uuid_le(buf, &guid)) - return -EINVAL; + retval = uuid_le_to_bin(buf, &guid); + if (retval) + return retval; + retval = -ENODEV; spin_lock(&drv->dynids.lock); list_for_each_entry_safe(dynid, n, &drv->dynids.list, node) { struct hv_vmbus_device_id *id = &dynid->id; @@ -798,8 +767,10 @@ static void vmbus_device_release(struct device *device) struct hv_device *hv_dev = device_to_hv_device(device); struct vmbus_channel *channel = hv_dev->channel; + mutex_lock(&vmbus_connection.channel_mutex); hv_process_channel_removal(channel, channel->offermsg.child_relid); + mutex_unlock(&vmbus_connection.channel_mutex); kfree(hv_dev); } @@ -877,7 +848,32 @@ void vmbus_on_msg_dpc(unsigned long data) INIT_WORK(&ctx->work, vmbus_onmessage_work); memcpy(&ctx->msg, msg, sizeof(*msg)); - queue_work(vmbus_connection.work_queue, &ctx->work); + /* + * The host can generate a rescind message while we + * may still be handling the original offer. We deal with + * this condition by ensuring the processing is done on the + * same CPU. + */ + switch (hdr->msgtype) { + case CHANNELMSG_RESCIND_CHANNELOFFER: + /* + * If we are handling the rescind message; + * schedule the work on the global work queue. + */ + schedule_work_on(vmbus_connection.connect_cpu, + &ctx->work); + break; + + case CHANNELMSG_OFFERCHANNEL: + atomic_inc(&vmbus_connection.offer_in_progress); + queue_work_on(vmbus_connection.connect_cpu, + vmbus_connection.work_queue, + &ctx->work); + break; + + default: + queue_work(vmbus_connection.work_queue, &ctx->work); + } } else entry->message_handler(hdr); |