From 9c17d96500f78d7ecdb71ca6942830158bc75a2b Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Tue, 10 Nov 2015 15:10:33 -0500
Subject: xen/gntdev: Grant maps should not be subject to NUMA balancing

Doing so will cause the grant to be unmapped and then, during
fault handling, the fault to be mistakenly treated as NUMA hint
fault.

In addition, even if those maps could partcipate in NUMA
balancing, it wouldn't provide any benefit since we are unable
to determine physical page's node (even if/when VNUMA is
implemented).

Marking grant maps' VMAs as VM_IO will exclude them from being
part of NUMA balancing.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/gntdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 2ea0b3b2a91d..1be5dd048622 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -804,7 +804,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	vma->vm_ops = &gntdev_vmops;
 
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_IO;
 
 	if (use_ptemod)
 		vma->vm_flags |= VM_DONTCOPY;
-- 
cgit v1.2.3


From b4ff8389ed14b849354b59ce9b360bdefcdbf99c Mon Sep 17 00:00:00 2001
From: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Date: Fri, 20 Nov 2015 11:25:04 -0500
Subject: xen/events: Always allocate legacy interrupts on PV guests

After commit 8c058b0b9c34 ("x86/irq: Probe for PIC presence before
allocating descs for legacy IRQs") early_irq_init() will no longer
preallocate descriptors for legacy interrupts if PIC does not
exist, which is the case for Xen PV guests.

Therefore we may need to allocate those descriptors ourselves.

Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 arch/arm/include/asm/irq.h       | 5 +++++
 arch/arm64/include/asm/irq.h     | 5 +++++
 drivers/xen/events/events_base.c | 5 +++--
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'drivers/xen')

diff --git a/arch/arm/include/asm/irq.h b/arch/arm/include/asm/irq.h
index be1d07d59ee9..1bd9510de1b9 100644
--- a/arch/arm/include/asm/irq.h
+++ b/arch/arm/include/asm/irq.h
@@ -40,6 +40,11 @@ extern void arch_trigger_all_cpu_backtrace(bool);
 #define arch_trigger_all_cpu_backtrace(x) arch_trigger_all_cpu_backtrace(x)
 #endif
 
+static inline int nr_legacy_irqs(void)
+{
+	return NR_IRQS_LEGACY;
+}
+
 #endif
 
 #endif
diff --git a/arch/arm64/include/asm/irq.h b/arch/arm64/include/asm/irq.h
index bbb251b14746..8b9bf54105b3 100644
--- a/arch/arm64/include/asm/irq.h
+++ b/arch/arm64/include/asm/irq.h
@@ -21,4 +21,9 @@ static inline void acpi_irq_init(void)
 }
 #define acpi_irq_init acpi_irq_init
 
+static inline int nr_legacy_irqs(void)
+{
+	return 0;
+}
+
 #endif
diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c
index 849500e4e14d..524c22146429 100644
--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -39,6 +39,7 @@
 #include <asm/irq.h>
 #include <asm/idle.h>
 #include <asm/io_apic.h>
+#include <asm/i8259.h>
 #include <asm/xen/pci.h>
 #endif
 #include <asm/sync_bitops.h>
@@ -420,7 +421,7 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
 		return xen_allocate_irq_dynamic();
 
 	/* Legacy IRQ descriptors are already allocated by the arch. */
-	if (gsi < NR_IRQS_LEGACY)
+	if (gsi < nr_legacy_irqs())
 		irq = gsi;
 	else
 		irq = irq_alloc_desc_at(gsi, -1);
@@ -446,7 +447,7 @@ static void xen_free_irq(unsigned irq)
 	kfree(info);
 
 	/* Legacy IRQ descriptors are managed by the arch. */
-	if (irq < NR_IRQS_LEGACY)
+	if (irq < nr_legacy_irqs())
 		return;
 
 	irq_free_desc(irq);
-- 
cgit v1.2.3


From 8620015499101090ae275bf11e9bc2f9febfdf08 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 26 Nov 2015 16:14:35 +0000
Subject: xen/evtchn: dynamically grow pending event channel ring

If more than 1024 event channels are bound to a evtchn device then it
possible (even with well behaved applications) for the ring to
overflow and events to be lost (reported as an -EFBIG error).

Dynamically increase the size of the ring so there is always enough
space for all bound events.  Well behaved applicables that only unmask
events after draining them from the ring can thus no longer lose
events.

However, an application could unmask an event before draining it,
allowing multiple entries per port to accumulate in the ring, and a
overflow could still occur.  So the overflow detection and reporting
is retained.

The ring size is initially only 64 entries so the common use case of
an application only binding a few events will use less memory than
before.  The ring size may grow to 512 KiB (enough for all 2^17
possible channels).  This order 7 kmalloc() may fail due to memory
fragmentation, so we fall back to trying vmalloc().

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>
---
 drivers/xen/evtchn.c | 123 ++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 107 insertions(+), 16 deletions(-)

(limited to 'drivers/xen')

diff --git a/drivers/xen/evtchn.c b/drivers/xen/evtchn.c
index 00f40f051d95..38272ad24551 100644
--- a/drivers/xen/evtchn.c
+++ b/drivers/xen/evtchn.c
@@ -49,6 +49,8 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/cpu.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
 
 #include <xen/xen.h>
 #include <xen/events.h>
@@ -58,10 +60,10 @@
 struct per_user_data {
 	struct mutex bind_mutex; /* serialize bind/unbind operations */
 	struct rb_root evtchns;
+	unsigned int nr_evtchns;
 
 	/* Notification ring, accessed via /dev/xen/evtchn. */
-#define EVTCHN_RING_SIZE     (PAGE_SIZE / sizeof(evtchn_port_t))
-#define EVTCHN_RING_MASK(_i) ((_i)&(EVTCHN_RING_SIZE-1))
+	unsigned int ring_size;
 	evtchn_port_t *ring;
 	unsigned int ring_cons, ring_prod, ring_overflow;
 	struct mutex ring_cons_mutex; /* protect against concurrent readers */
@@ -80,10 +82,41 @@ struct user_evtchn {
 	bool enabled;
 };
 
+static evtchn_port_t *evtchn_alloc_ring(unsigned int size)
+{
+	evtchn_port_t *ring;
+	size_t s = size * sizeof(*ring);
+
+	ring = kmalloc(s, GFP_KERNEL);
+	if (!ring)
+		ring = vmalloc(s);
+
+	return ring;
+}
+
+static void evtchn_free_ring(evtchn_port_t *ring)
+{
+	kvfree(ring);
+}
+
+static unsigned int evtchn_ring_offset(struct per_user_data *u,
+				       unsigned int idx)
+{
+	return idx & (u->ring_size - 1);
+}
+
+static evtchn_port_t *evtchn_ring_entry(struct per_user_data *u,
+					unsigned int idx)
+{
+	return u->ring + evtchn_ring_offset(u, idx);
+}
+
 static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
 	struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL;
 
+	u->nr_evtchns++;
+
 	while (*new) {
 		struct user_evtchn *this;
 
@@ -107,6 +140,7 @@ static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 
 static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn)
 {
+	u->nr_evtchns--;
 	rb_erase(&evtchn->node, &u->evtchns);
 	kfree(evtchn);
 }
@@ -144,8 +178,8 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
 
 	spin_lock(&u->ring_prod_lock);
 
-	if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) {
-		u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port;
+	if ((u->ring_prod - u->ring_cons) < u->ring_size) {
+		*evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
 		wmb(); /* Ensure ring contents visible */
 		if (u->ring_cons == u->ring_prod++) {
 			wake_up_interruptible(&u->evtchn_wait);
@@ -200,10 +234,10 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
 	}
 
 	/* Byte lengths of two chunks. Chunk split (if any) is at ring wrap. */
-	if (((c ^ p) & EVTCHN_RING_SIZE) != 0) {
-		bytes1 = (EVTCHN_RING_SIZE - EVTCHN_RING_MASK(c)) *
+	if (((c ^ p) & u->ring_size) != 0) {
+		bytes1 = (u->ring_size - evtchn_ring_offset(u, c)) *
 			sizeof(evtchn_port_t);
-		bytes2 = EVTCHN_RING_MASK(p) * sizeof(evtchn_port_t);
+		bytes2 = evtchn_ring_offset(u, p) * sizeof(evtchn_port_t);
 	} else {
 		bytes1 = (p - c) * sizeof(evtchn_port_t);
 		bytes2 = 0;
@@ -219,7 +253,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
 
 	rc = -EFAULT;
 	rmb(); /* Ensure that we see the port before we copy it. */
-	if (copy_to_user(buf, &u->ring[EVTCHN_RING_MASK(c)], bytes1) ||
+	if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
 	    ((bytes2 != 0) &&
 	     copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
 		goto unlock_out;
@@ -278,6 +312,66 @@ static ssize_t evtchn_write(struct file *file, const char __user *buf,
 	return rc;
 }
 
+static int evtchn_resize_ring(struct per_user_data *u)
+{
+	unsigned int new_size;
+	evtchn_port_t *new_ring, *old_ring;
+	unsigned int p, c;
+
+	/*
+	 * Ensure the ring is large enough to capture all possible
+	 * events. i.e., one free slot for each bound event.
+	 */
+	if (u->nr_evtchns <= u->ring_size)
+		return 0;
+
+	if (u->ring_size == 0)
+		new_size = 64;
+	else
+		new_size = 2 * u->ring_size;
+
+	new_ring = evtchn_alloc_ring(new_size);
+	if (!new_ring)
+		return -ENOMEM;
+
+	old_ring = u->ring;
+
+	/*
+	 * Access to the ring contents is serialized by either the
+	 * prod /or/ cons lock so take both when resizing.
+	 */
+	mutex_lock(&u->ring_cons_mutex);
+	spin_lock_irq(&u->ring_prod_lock);
+
+	/*
+	 * Copy the old ring contents to the new ring.
+	 *
+	 * If the ring contents crosses the end of the current ring,
+	 * it needs to be copied in two chunks.
+	 *
+	 * +---------+    +------------------+
+	 * |34567  12| -> |       1234567    |
+	 * +-----p-c-+    +------------------+
+	 */
+	p = evtchn_ring_offset(u, u->ring_prod);
+	c = evtchn_ring_offset(u, u->ring_cons);
+	if (p < c) {
+		memcpy(new_ring + c, u->ring + c, (u->ring_size - c) * sizeof(*u->ring));
+		memcpy(new_ring + u->ring_size, u->ring, p * sizeof(*u->ring));
+	} else
+		memcpy(new_ring + c, u->ring + c, (p - c) * sizeof(*u->ring));
+
+	u->ring = new_ring;
+	u->ring_size = new_size;
+
+	spin_unlock_irq(&u->ring_prod_lock);
+	mutex_unlock(&u->ring_cons_mutex);
+
+	evtchn_free_ring(old_ring);
+
+	return 0;
+}
+
 static int evtchn_bind_to_user(struct per_user_data *u, int port)
 {
 	struct user_evtchn *evtchn;
@@ -305,6 +399,10 @@ static int evtchn_bind_to_user(struct per_user_data *u, int port)
 	if (rc < 0)
 		goto err;
 
+	rc = evtchn_resize_ring(u);
+	if (rc < 0)
+		goto err;
+
 	rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, 0,
 				       u->name, evtchn);
 	if (rc < 0)
@@ -503,13 +601,6 @@ static int evtchn_open(struct inode *inode, struct file *filp)
 
 	init_waitqueue_head(&u->evtchn_wait);
 
-	u->ring = (evtchn_port_t *)__get_free_page(GFP_KERNEL);
-	if (u->ring == NULL) {
-		kfree(u->name);
-		kfree(u);
-		return -ENOMEM;
-	}
-
 	mutex_init(&u->bind_mutex);
 	mutex_init(&u->ring_cons_mutex);
 	spin_lock_init(&u->ring_prod_lock);
@@ -532,7 +623,7 @@ static int evtchn_release(struct inode *inode, struct file *filp)
 		evtchn_unbind_from_user(u, evtchn);
 	}
 
-	free_page((unsigned long)u->ring);
+	evtchn_free_ring(u->ring);
 	kfree(u->name);
 	kfree(u);
 
-- 
cgit v1.2.3