From 9594a4986192f99c01a7c0a1779b5ac0eff8e208 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:53:25 +0900
Subject: KVM: MMU: Use __gfn_to_rmap() to clean up kvm_handle_hva()

We can treat every level uniformly.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 28c8fbcc6763..2beb95b57cbe 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1281,14 +1281,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
 			gfn_t gfn = memslot->base_gfn + gfn_offset;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset], data);
+			ret = 0;
 
-			for (j = 0; j < KVM_NR_PAGE_SIZES - 1; ++j) {
-				struct kvm_lpage_info *linfo;
+			for (j = PT_PAGE_TABLE_LEVEL;
+			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+				unsigned long *rmapp;
 
-				linfo = lpage_info_slot(gfn, memslot,
-							PT_DIRECTORY_LEVEL + j);
-				ret |= handler(kvm, &linfo->rmap_pde, data);
+				rmapp = __gfn_to_rmap(gfn, j, memslot);
+				ret |= handler(kvm, rmapp, data);
 			}
 			trace_kvm_age_page(hva, memslot, ret);
 			retval |= ret;
-- 
cgit v1.2.3


From d19a748b1c42b133e9263e9023c1d162efa6f4ad Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:54:30 +0900
Subject: KVM: Introduce hva_to_gfn_memslot() for kvm_handle_hva()

This restricts hva handling in mmu code and makes it easier to extend
kvm_handle_hva() so that it can treat a range of addresses later in this
patch series.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 6 +++---
 arch/x86/kvm/mmu.c                  | 3 +--
 include/linux/kvm_host.h            | 8 ++++++++
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d03eb6f7b058..37037553fe60 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -772,10 +772,10 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
-			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
+			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset],
-				      memslot->base_gfn + gfn_offset);
+			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
 			retval |= ret;
 		}
 	}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 2beb95b57cbe..170a632d9d34 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1278,8 +1278,7 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 		end = start + (memslot->npages << PAGE_SHIFT);
 		if (hva >= start && hva < end) {
-			gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT;
-			gfn_t gfn = memslot->base_gfn + gfn_offset;
+			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
 
 			ret = 0;
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e3c86f8c86c9..6f6c18a03c50 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -740,6 +740,14 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 		(base_gfn >> KVM_HPAGE_GFN_SHIFT(level));
 }
 
+static inline gfn_t
+hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
+{
+	gfn_t gfn_offset = (hva - slot->userspace_addr) >> PAGE_SHIFT;
+
+	return slot->base_gfn + gfn_offset;
+}
+
 static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
 					       gfn_t gfn)
 {
-- 
cgit v1.2.3


From 84504ef38673fa021b3d8f3da2b79cf878b33315 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:55:48 +0900
Subject: KVM: MMU: Make kvm_handle_hva() handle range of addresses

When guest's memory is backed by THP pages, MMU notifier needs to call
kvm_unmap_hva(), which in turn leads to kvm_handle_hva(), in a loop to
invalidate a range of pages which constitute one huge page:

  for each page
    for each memslot
      if page is in memslot
        unmap using rmap

This means although every page in that range is expected to be found in
the same memslot, we are forced to check unrelated memslots many times.
If the guest has more memslots, the situation will become worse.

Furthermore, if the range does not include any pages in the guest's
memory, the loop over the pages will just consume extra time.

This patch, together with the following patches, solves this problem by
introducing kvm_handle_hva_range() which makes the loop look like this:

  for each memslot
    for each page in memslot
      unmap using rmap

In this new processing, the actual work is converted to a loop over rmap
which is much more cache friendly than before.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 36 ++++++++++++++++++++++++-------
 arch/x86/kvm/mmu.c                  | 42 ++++++++++++++++++++++++++++---------
 2 files changed, 60 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 37037553fe60..1a470bc28763 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -756,9 +756,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 	goto out_put;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-					 unsigned long gfn))
+static int kvm_handle_hva_range(struct kvm *kvm,
+				unsigned long start,
+				unsigned long end,
+				int (*handler)(struct kvm *kvm,
+					       unsigned long *rmapp,
+					       unsigned long gfn))
 {
 	int ret;
 	int retval = 0;
@@ -767,12 +770,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 
 	slots = kvm_memslots(kvm);
 	kvm_for_each_memslot(memslot, slots) {
-		unsigned long start = memslot->userspace_addr;
-		unsigned long end;
+		unsigned long hva_start, hva_end;
+		gfn_t gfn, gfn_end;
+
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+		/*
+		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 */
+		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
-		end = start + (memslot->npages << PAGE_SHIFT);
-		if (hva >= start && hva < end) {
-			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+		for (; gfn < gfn_end; ++gfn) {
 			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
 			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
@@ -783,6 +796,13 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	return retval;
 }
 
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 unsigned long gfn))
+{
+	return kvm_handle_hva_range(kvm, hva, hva + 1, handler);
+}
+
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			   unsigned long gfn)
 {
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 170a632d9d34..7235b0c9587d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1259,10 +1259,13 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return 0;
 }
 
-static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
-			  unsigned long data,
-			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
-					 unsigned long data))
+static int kvm_handle_hva_range(struct kvm *kvm,
+				unsigned long start,
+				unsigned long end,
+				unsigned long data,
+				int (*handler)(struct kvm *kvm,
+					       unsigned long *rmapp,
+					       unsigned long data))
 {
 	int j;
 	int ret;
@@ -1273,13 +1276,22 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	slots = kvm_memslots(kvm);
 
 	kvm_for_each_memslot(memslot, slots) {
-		unsigned long start = memslot->userspace_addr;
-		unsigned long end;
+		unsigned long hva_start, hva_end;
+		gfn_t gfn, gfn_end;
 
-		end = start + (memslot->npages << PAGE_SHIFT);
-		if (hva >= start && hva < end) {
-			gfn_t gfn = hva_to_gfn_memslot(hva, memslot);
+		hva_start = max(start, memslot->userspace_addr);
+		hva_end = min(end, memslot->userspace_addr +
+					(memslot->npages << PAGE_SHIFT));
+		if (hva_start >= hva_end)
+			continue;
+		/*
+		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
+		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 */
+		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
+		for (; gfn < gfn_end; ++gfn) {
 			ret = 0;
 
 			for (j = PT_PAGE_TABLE_LEVEL;
@@ -1289,7 +1301,9 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
 				ret |= handler(kvm, rmapp, data);
 			}
-			trace_kvm_age_page(hva, memslot, ret);
+			trace_kvm_age_page(memslot->userspace_addr +
+					(gfn - memslot->base_gfn) * PAGE_SIZE,
+					memslot, ret);
 			retval |= ret;
 		}
 	}
@@ -1297,6 +1311,14 @@ static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 	return retval;
 }
 
+static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
+			  unsigned long data,
+			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 unsigned long data))
+{
+	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
+}
+
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 {
 	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
-- 
cgit v1.2.3


From b3ae2096974b12c3af2ad1a4e7716b084949867f Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:56:33 +0900
Subject: KVM: Introduce kvm_unmap_hva_range() for
 kvm_mmu_notifier_invalidate_range_start()

When we tested KVM under memory pressure, with THP enabled on the host,
we noticed that MMU notifier took a long time to invalidate huge pages.

Since the invalidation was done with mmu_lock held, it not only wasted
the CPU but also made the host harder to respond.

This patch mitigates this by using kvm_handle_hva_range().

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Cc: Alexander Graf <agraf@suse.de>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++++++
 arch/x86/include/asm/kvm_host.h     | 1 +
 arch/x86/kvm/mmu.c                  | 5 +++++
 virt/kvm/kvm_main.c                 | 3 +--
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 50ea12fd7bf5..572ad0141268 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -52,6 +52,8 @@
 
 struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_unmap_hva_range(struct kvm *kvm,
+			       unsigned long start, unsigned long end);
 extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 1a470bc28763..3c635c0616b0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -870,6 +870,13 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 	return 0;
 }
 
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	if (kvm->arch.using_mmu_notifiers)
+		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	return 0;
+}
+
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 			 unsigned long gfn)
 {
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a3e9409e90b6..d4aab865606c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -944,6 +944,7 @@ extern bool kvm_rebooting;
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_age_hva(struct kvm *kvm, unsigned long hva);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 7235b0c9587d..d2855f895fde 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1324,6 +1324,11 @@ int kvm_unmap_hva(struct kvm *kvm, unsigned long hva)
 	return kvm_handle_hva(kvm, hva, 0, kvm_unmap_rmapp);
 }
 
+int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
+{
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp);
+}
+
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
 	kvm_handle_hva(kvm, hva, (unsigned long)&pte, kvm_set_pte_rmapp);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index b3ce91c623e2..e2b1a159e5df 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -332,8 +332,7 @@ static void kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
 	 * count is also read inside the mmu_lock critical section.
 	 */
 	kvm->mmu_notifier_count++;
-	for (; start < end; start += PAGE_SIZE)
-		need_tlb_flush |= kvm_unmap_hva(kvm, start);
+	need_tlb_flush = kvm_unmap_hva_range(kvm, start, end);
 	need_tlb_flush |= kvm->tlbs_dirty;
 	/* we've to flush the tlb before the pages can be freed */
 	if (need_tlb_flush)
-- 
cgit v1.2.3


From 77d11309b3a10e1ce112058ec2c9b7b979bcf311 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:57:17 +0900
Subject: KVM: Separate rmap_pde from kvm_lpage_info->write_count

This makes it possible to loop over rmap_pde arrays in the same way as
we do over rmap so that we can optimize kvm_handle_hva_range() easily in
the following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 +-
 arch/x86/kvm/mmu.c              |  6 +++---
 arch/x86/kvm/x86.c              | 11 +++++++++++
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d4aab865606c..4f98da9243fc 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -500,11 +500,11 @@ struct kvm_vcpu_arch {
 };
 
 struct kvm_lpage_info {
-	unsigned long rmap_pde;
 	int write_count;
 };
 
 struct kvm_arch_memory_slot {
+	unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d2855f895fde..3b3f5ae5da6a 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -960,13 +960,13 @@ static void pte_list_walk(unsigned long *pte_list, pte_list_walk_fn fn)
 static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 				    struct kvm_memory_slot *slot)
 {
-	struct kvm_lpage_info *linfo;
+	unsigned long idx;
 
 	if (likely(level == PT_PAGE_TABLE_LEVEL))
 		return &slot->rmap[gfn - slot->base_gfn];
 
-	linfo = lpage_info_slot(gfn, slot, level);
-	return &linfo->rmap_pde;
+	idx = gfn_to_index(gfn, slot->base_gfn, level);
+	return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59b59508ff07..829b4e972558 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6314,6 +6314,10 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 	int i;
 
 	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
+			kvm_kvfree(free->arch.rmap_pde[i]);
+			free->arch.rmap_pde[i] = NULL;
+		}
 		if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
 			kvm_kvfree(free->arch.lpage_info[i]);
 			free->arch.lpage_info[i] = NULL;
@@ -6333,6 +6337,11 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
+		slot->arch.rmap_pde[i] =
+			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
+		if (!slot->arch.rmap_pde[i])
+			goto out_free;
+
 		slot->arch.lpage_info[i] =
 			kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
 		if (!slot->arch.lpage_info[i])
@@ -6361,7 +6370,9 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 
 out_free:
 	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+		kvm_kvfree(slot->arch.rmap_pde[i]);
 		kvm_kvfree(slot->arch.lpage_info[i]);
+		slot->arch.rmap_pde[i] = NULL;
 		slot->arch.lpage_info[i] = NULL;
 	}
 	return -ENOMEM;
-- 
cgit v1.2.3


From 048212d0bc0b1769a4bbecd7ace8c8d237577d1b Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:57:59 +0900
Subject: KVM: MMU: Add memslot parameter to hva handlers

This is needed to push trace_kvm_age_page() into kvm_age_rmapp() in the
following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3b3f5ae5da6a..dfd7a9a31154 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1200,7 +1200,7 @@ static bool rmap_write_protect(struct kvm *kvm, u64 gfn)
 }
 
 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			   unsigned long data)
+			   struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1218,7 +1218,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			     unsigned long data)
+			     struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1265,6 +1265,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long data,
 				int (*handler)(struct kvm *kvm,
 					       unsigned long *rmapp,
+					       struct kvm_memory_slot *slot,
 					       unsigned long data))
 {
 	int j;
@@ -1299,7 +1300,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				unsigned long *rmapp;
 
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
-				ret |= handler(kvm, rmapp, data);
+				ret |= handler(kvm, rmapp, memslot, data);
 			}
 			trace_kvm_age_page(memslot->userspace_addr +
 					(gfn - memslot->base_gfn) * PAGE_SIZE,
@@ -1314,6 +1315,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
 			  unsigned long data,
 			  int (*handler)(struct kvm *kvm, unsigned long *rmapp,
+					 struct kvm_memory_slot *slot,
 					 unsigned long data))
 {
 	return kvm_handle_hva_range(kvm, hva, hva + 1, data, handler);
@@ -1335,7 +1337,7 @@ void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte)
 }
 
 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			 unsigned long data)
+			 struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator uninitialized_var(iter);
@@ -1350,7 +1352,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
 	if (!shadow_accessed_mask)
-		return kvm_unmap_rmapp(kvm, rmapp, data);
+		return kvm_unmap_rmapp(kvm, rmapp, slot, data);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1367,7 +1369,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
-			      unsigned long data)
+			      struct kvm_memory_slot *slot, unsigned long data)
 {
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -1405,7 +1407,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 	rmapp = gfn_to_rmap(vcpu->kvm, gfn, sp->role.level);
 
-	kvm_unmap_rmapp(vcpu->kvm, rmapp, 0);
+	kvm_unmap_rmapp(vcpu->kvm, rmapp, NULL, 0);
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-- 
cgit v1.2.3


From f395302e09ef783b8f82d1160510a95aa8c66dbc Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:58:48 +0900
Subject: KVM: MMU: Push trace_kvm_age_page() into kvm_age_rmapp()

This restricts the tracing to page aging and makes it possible to
optimize kvm_handle_hva_range() further in the following patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index dfd7a9a31154..58adec384489 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1269,8 +1269,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 					       unsigned long data))
 {
 	int j;
-	int ret;
-	int retval = 0;
+	int ret = 0;
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
 
@@ -1293,8 +1292,6 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
 		for (; gfn < gfn_end; ++gfn) {
-			ret = 0;
-
 			for (j = PT_PAGE_TABLE_LEVEL;
 			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
 				unsigned long *rmapp;
@@ -1302,14 +1299,10 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 				rmapp = __gfn_to_rmap(gfn, j, memslot);
 				ret |= handler(kvm, rmapp, memslot, data);
 			}
-			trace_kvm_age_page(memslot->userspace_addr +
-					(gfn - memslot->base_gfn) * PAGE_SIZE,
-					memslot, ret);
-			retval |= ret;
 		}
 	}
 
-	return retval;
+	return ret;
 }
 
 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva,
@@ -1351,8 +1344,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	 * This has some overhead, but not as much as the cost of swapping
 	 * out actively used pages or breaking up actively used hugepages.
 	 */
-	if (!shadow_accessed_mask)
-		return kvm_unmap_rmapp(kvm, rmapp, slot, data);
+	if (!shadow_accessed_mask) {
+		young = kvm_unmap_rmapp(kvm, rmapp, slot, data);
+		goto out;
+	}
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1364,7 +1359,9 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-
+out:
+	/* @data has hva passed to kvm_age_hva(). */
+	trace_kvm_age_page(data, slot, young);
 	return young;
 }
 
@@ -1413,7 +1410,7 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 
 int kvm_age_hva(struct kvm *kvm, unsigned long hva)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	return kvm_handle_hva(kvm, hva, hva, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
-- 
cgit v1.2.3


From bcd3ef58283a471d6b65855b83f78bd39eb55391 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Mon, 2 Jul 2012 17:59:33 +0900
Subject: KVM: MMU: Avoid handling same rmap_pde in kvm_handle_hva_range()

When we invalidate a THP page, we call the handler with the same
rmap_pde argument 512 times in the following loop:

  for each guest page in the range
    for each level
      unmap using rmap

This patch avoids these extra handler calls by changing the loop order
like this:

  for each level
    for each rmap in the range
      unmap using rmap

With the preceding patches in the patch series, this made THP page
invalidation more than 5 times faster on our x86 host: the host became
more responsive during swapping the guest's memory as a result.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 58adec384489..a5d6ef785b7e 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1277,7 +1277,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 
 	kvm_for_each_memslot(memslot, slots) {
 		unsigned long hva_start, hva_end;
-		gfn_t gfn, gfn_end;
+		gfn_t gfn_start, gfn_end;
 
 		hva_start = max(start, memslot->userspace_addr);
 		hva_end = min(end, memslot->userspace_addr +
@@ -1286,19 +1286,27 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 			continue;
 		/*
 		 * {gfn(page) | page intersects with [hva_start, hva_end)} =
-		 * {gfn, gfn+1, ..., gfn_end-1}.
+		 * {gfn_start, gfn_start+1, ..., gfn_end-1}.
 		 */
-		gfn = hva_to_gfn_memslot(hva_start, memslot);
+		gfn_start = hva_to_gfn_memslot(hva_start, memslot);
 		gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot);
 
-		for (; gfn < gfn_end; ++gfn) {
-			for (j = PT_PAGE_TABLE_LEVEL;
-			     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
-				unsigned long *rmapp;
+		for (j = PT_PAGE_TABLE_LEVEL;
+		     j < PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES; ++j) {
+			unsigned long idx, idx_end;
+			unsigned long *rmapp;
 
-				rmapp = __gfn_to_rmap(gfn, j, memslot);
-				ret |= handler(kvm, rmapp, memslot, data);
-			}
+			/*
+			 * {idx(page_j) | page_j intersects with
+			 *  [hva_start, hva_end)} = {idx, idx+1, ..., idx_end}.
+			 */
+			idx = gfn_to_index(gfn_start, memslot->base_gfn, j);
+			idx_end = gfn_to_index(gfn_end - 1, memslot->base_gfn, j);
+
+			rmapp = __gfn_to_rmap(gfn_start, j, memslot);
+
+			for (; idx <= idx_end; ++idx)
+				ret |= handler(kvm, rmapp++, memslot, data);
 		}
 	}
 
-- 
cgit v1.2.3


From 2f5f6ad9390c1ebbf738d130dbfe80b60eaa167e Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 8 Aug 2011 16:57:47 -0400
Subject: ftrace: Pass ftrace_ops as third parameter to function trace callback

Currently the function trace callback receives only the ip and parent_ip
of the function that it traced. It would be more powerful to also return
the ops that registered the function as well. This allows the same function
to act differently depending on what ftrace_ops registered it.

Link: http://lkml.kernel.org/r/20120612225424.267254552@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h     |   4 ++
 arch/x86/kernel/entry_64.S        |   1 +
 include/linux/ftrace.h            |  16 +++++-
 kernel/trace/ftrace.c             | 101 ++++++++++++++++++++++++++------------
 kernel/trace/trace_event_perf.c   |   3 +-
 kernel/trace/trace_events.c       |   3 +-
 kernel/trace/trace_functions.c    |   9 ++--
 kernel/trace/trace_irqsoff.c      |   3 +-
 kernel/trace/trace_sched_wakeup.c |   2 +-
 kernel/trace/trace_selftest.c     |  15 ++++--
 kernel/trace/trace_stack.c        |   2 +-
 11 files changed, 113 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b0767bc08740..783b107eacbc 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,6 +32,10 @@
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
+#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+#endif
+
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern atomic_t modifying_ftrace_code;
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 7d65133b51be..2b4f94c5dc60 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -79,6 +79,7 @@ ENTRY(ftrace_caller)
 
 	MCOUNT_SAVE_FRAME
 
+	leaq function_trace_op, %rdx
 	movq 0x38(%rsp), %rdi
 	movq 8(%rbp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rdi
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 55e6d63d46d0..2d5964119885 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -18,6 +18,15 @@
 
 #include <asm/ftrace.h>
 
+/*
+ * If the arch supports passing the variable contents of
+ * function_trace_op as the third parameter back from the
+ * mcount call, then the arch should define this as 1.
+ */
+#ifndef ARCH_SUPPORTS_FTRACE_OPS
+#define ARCH_SUPPORTS_FTRACE_OPS 0
+#endif
+
 struct module;
 struct ftrace_hash;
 
@@ -29,7 +38,10 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 		     void __user *buffer, size_t *lenp,
 		     loff_t *ppos);
 
-typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
+struct ftrace_ops;
+
+typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
+			      struct ftrace_ops *op);
 
 /*
  * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
@@ -163,7 +175,7 @@ static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
 	return *this_cpu_ptr(ops->disabled);
 }
 
-extern void ftrace_stub(unsigned long a0, unsigned long a1);
+extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op);
 
 #else /* !CONFIG_FUNCTION_TRACER */
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index b4f20fba09fc..4f2ab9352a68 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -64,12 +64,19 @@
 
 #define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
 
+static struct ftrace_ops ftrace_list_end __read_mostly = {
+	.func		= ftrace_stub,
+};
+
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
 
 /* Quick disabling of function tracer. */
-int function_trace_stop;
+int function_trace_stop __read_mostly;
+
+/* Current function tracing op */
+struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
 
 /* List for set_ftrace_pid's pids. */
 LIST_HEAD(ftrace_pids);
@@ -86,10 +93,6 @@ static int ftrace_disabled __read_mostly;
 
 static DEFINE_MUTEX(ftrace_lock);
 
-static struct ftrace_ops ftrace_list_end __read_mostly = {
-	.func		= ftrace_stub,
-};
-
 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
@@ -100,8 +103,14 @@ ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
 static struct ftrace_ops control_ops;
 
-static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
+#if ARCH_SUPPORTS_FTRACE_OPS
+static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op);
+#else
+/* See comment below, where ftrace_ops_list_func is defined */
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip);
+#define ftrace_ops_list_func ((ftrace_func_t)ftrace_ops_no_ops)
+#endif
 
 /*
  * Traverse the ftrace_global_list, invoking all entries.  The reason that we
@@ -112,29 +121,29 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
  *
  * Silly Alpha and silly pointer-speculation compiler optimizations!
  */
-static void ftrace_global_list_func(unsigned long ip,
-				    unsigned long parent_ip)
+static void
+ftrace_global_list_func(unsigned long ip, unsigned long parent_ip,
+			struct ftrace_ops *op)
 {
-	struct ftrace_ops *op;
-
 	if (unlikely(trace_recursion_test(TRACE_GLOBAL_BIT)))
 		return;
 
 	trace_recursion_set(TRACE_GLOBAL_BIT);
 	op = rcu_dereference_raw(ftrace_global_list); /*see above*/
 	while (op != &ftrace_list_end) {
-		op->func(ip, parent_ip);
+		op->func(ip, parent_ip, op);
 		op = rcu_dereference_raw(op->next); /*see above*/
 	};
 	trace_recursion_clear(TRACE_GLOBAL_BIT);
 }
 
-static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
+			    struct ftrace_ops *op)
 {
 	if (!test_tsk_trace_trace(current))
 		return;
 
-	ftrace_pid_function(ip, parent_ip);
+	ftrace_pid_function(ip, parent_ip, op);
 }
 
 static void set_ftrace_pid_function(ftrace_func_t func)
@@ -163,12 +172,13 @@ void clear_ftrace_function(void)
  * For those archs that do not test ftrace_trace_stop in their
  * mcount call site, we need to do it from C.
  */
-static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
+static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip,
+				  struct ftrace_ops *op)
 {
 	if (function_trace_stop)
 		return;
 
-	__ftrace_trace_function(ip, parent_ip);
+	__ftrace_trace_function(ip, parent_ip, op);
 }
 #endif
 
@@ -230,15 +240,24 @@ static void update_ftrace_function(void)
 
 	/*
 	 * If we are at the end of the list and this ops is
-	 * not dynamic, then have the mcount trampoline call
-	 * the function directly
+	 * not dynamic and the arch supports passing ops, then have the
+	 * mcount trampoline call the function directly.
 	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
 	    (ftrace_ops_list->next == &ftrace_list_end &&
-	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC)))
+	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
+	     ARCH_SUPPORTS_FTRACE_OPS)) {
+		/* Set the ftrace_ops that the arch callback uses */
+		if (ftrace_ops_list == &global_ops)
+			function_trace_op = ftrace_global_list;
+		else
+			function_trace_op = ftrace_ops_list;
 		func = ftrace_ops_list->func;
-	else
+	} else {
+		/* Just use the default ftrace_ops */
+		function_trace_op = &ftrace_list_end;
 		func = ftrace_ops_list_func;
+	}
 
 #ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	ftrace_trace_function = func;
@@ -773,7 +792,8 @@ ftrace_profile_alloc(struct ftrace_profile_stat *stat, unsigned long ip)
 }
 
 static void
-function_profile_call(unsigned long ip, unsigned long parent_ip)
+function_profile_call(unsigned long ip, unsigned long parent_ip,
+		      struct ftrace_ops *ops)
 {
 	struct ftrace_profile_stat *stat;
 	struct ftrace_profile *rec;
@@ -803,7 +823,7 @@ function_profile_call(unsigned long ip, unsigned long parent_ip)
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 static int profile_graph_entry(struct ftrace_graph_ent *trace)
 {
-	function_profile_call(trace->func, 0);
+	function_profile_call(trace->func, 0, NULL);
 	return 1;
 }
 
@@ -2790,8 +2810,8 @@ static int __init ftrace_mod_cmd_init(void)
 }
 device_initcall(ftrace_mod_cmd_init);
 
-static void
-function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
+static void function_trace_probe_call(unsigned long ip, unsigned long parent_ip,
+				      struct ftrace_ops *op)
 {
 	struct ftrace_func_probe *entry;
 	struct hlist_head *hhd;
@@ -3942,10 +3962,9 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 static void
-ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
+ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip,
+			struct ftrace_ops *op)
 {
-	struct ftrace_ops *op;
-
 	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
 		return;
 
@@ -3959,7 +3978,7 @@ ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
 	while (op != &ftrace_list_end) {
 		if (!ftrace_function_local_disabled(op) &&
 		    ftrace_ops_test(op, ip))
-			op->func(ip, parent_ip);
+			op->func(ip, parent_ip, op);
 
 		op = rcu_dereference_raw(op->next);
 	};
@@ -3971,8 +3990,9 @@ static struct ftrace_ops control_ops = {
 	.func = ftrace_ops_control_func,
 };
 
-static void
-ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
+static inline void
+__ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *ignored)
 {
 	struct ftrace_ops *op;
 
@@ -3988,13 +4008,32 @@ ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 	op = rcu_dereference_raw(ftrace_ops_list);
 	while (op != &ftrace_list_end) {
 		if (ftrace_ops_test(op, ip))
-			op->func(ip, parent_ip);
+			op->func(ip, parent_ip, op);
 		op = rcu_dereference_raw(op->next);
 	};
 	preempt_enable_notrace();
 	trace_recursion_clear(TRACE_INTERNAL_BIT);
 }
 
+/*
+ * Some archs only support passing ip and parent_ip. Even though
+ * the list function ignores the op parameter, we do not want any
+ * C side effects, where a function is called without the caller
+ * sending a third parameter.
+ */
+#if ARCH_SUPPORTS_FTRACE_OPS
+static void ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op)
+{
+	__ftrace_ops_list_func(ip, parent_ip, NULL);
+}
+#else
+static void ftrace_ops_no_ops(unsigned long ip, unsigned long parent_ip)
+{
+	__ftrace_ops_list_func(ip, parent_ip, NULL);
+}
+#endif
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fee3752ae8f6..a872a9a298a0 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -258,7 +258,8 @@ EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
 
 #ifdef CONFIG_FUNCTION_TRACER
 static void
-perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
+perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *ops)
 {
 	struct ftrace_entry *entry;
 	struct hlist_head *head;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 29111da1d100..88daa5177bf4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1681,7 +1681,8 @@ static __init void event_trace_self_tests(void)
 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
 
 static void
-function_test_events_call(unsigned long ip, unsigned long parent_ip)
+function_test_events_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *op)
 {
 	struct ring_buffer_event *event;
 	struct ring_buffer *buffer;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index c7b0c6a7db09..fceb7a9aa06d 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -48,7 +48,8 @@ static void function_trace_start(struct trace_array *tr)
 }
 
 static void
-function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
+function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip,
+				 struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
@@ -75,7 +76,8 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
 }
 
 static void
-function_trace_call(unsigned long ip, unsigned long parent_ip)
+function_trace_call(unsigned long ip, unsigned long parent_ip,
+		    struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
@@ -106,7 +108,8 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
 }
 
 static void
-function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
+function_stack_trace_call(unsigned long ip, unsigned long parent_ip,
+			  struct ftrace_ops *op)
 {
 	struct trace_array *tr = func_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 99d20e920368..2862c77f95d9 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -136,7 +136,8 @@ static int func_prolog_dec(struct trace_array *tr,
  * irqsoff uses its own tracer function to keep the overhead down:
  */
 static void
-irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip)
+irqsoff_tracer_call(unsigned long ip, unsigned long parent_ip,
+		    struct ftrace_ops *op)
 {
 	struct trace_array *tr = irqsoff_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_sched_wakeup.c b/kernel/trace/trace_sched_wakeup.c
index ff791ea48b57..0caf4f5da569 100644
--- a/kernel/trace/trace_sched_wakeup.c
+++ b/kernel/trace/trace_sched_wakeup.c
@@ -108,7 +108,7 @@ out_enable:
  * wakeup uses its own tracer function to keep the overhead down:
  */
 static void
-wakeup_tracer_call(unsigned long ip, unsigned long parent_ip)
+wakeup_tracer_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op)
 {
 	struct trace_array *tr = wakeup_trace;
 	struct trace_array_cpu *data;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 288541f977fb..9ae40c823af8 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -103,35 +103,40 @@ static inline void warn_failed_init_tracer(struct tracer *trace, int init_ret)
 
 static int trace_selftest_test_probe1_cnt;
 static void trace_selftest_test_probe1_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe1_cnt++;
 }
 
 static int trace_selftest_test_probe2_cnt;
 static void trace_selftest_test_probe2_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe2_cnt++;
 }
 
 static int trace_selftest_test_probe3_cnt;
 static void trace_selftest_test_probe3_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_probe3_cnt++;
 }
 
 static int trace_selftest_test_global_cnt;
 static void trace_selftest_test_global_func(unsigned long ip,
-					    unsigned long pip)
+					    unsigned long pip,
+					    struct ftrace_ops *op)
 {
 	trace_selftest_test_global_cnt++;
 }
 
 static int trace_selftest_test_dyn_cnt;
 static void trace_selftest_test_dyn_func(unsigned long ip,
-					 unsigned long pip)
+					 unsigned long pip,
+					 struct ftrace_ops *op)
 {
 	trace_selftest_test_dyn_cnt++;
 }
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index d4545f49242e..e20006d5fb6a 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -111,7 +111,7 @@ static inline void check_stack(void)
 }
 
 static void
-stack_trace_call(unsigned long ip, unsigned long parent_ip)
+stack_trace_call(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *op)
 {
 	int cpu;
 
-- 
cgit v1.2.3


From 28fb5dfa783c25dbeeb25a72663f8066a3a517f5 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 10 Aug 2011 22:00:55 -0400
Subject: ftrace/x86_32: Push ftrace_ops in as 3rd parameter to function tracer

Add support of passing the current ftrace_ops into the 3rd parameter
of the callback to the function tracer.

Link: http://lkml.kernel.org/r/20120612225424.942411318@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h | 2 +-
 arch/x86/kernel/entry_32.S    | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 783b107eacbc..b3bb1f3f2773 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -32,7 +32,7 @@
 #define MCOUNT_ADDR		((long)(mcount))
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
-#if defined(CONFIG_DYNAMIC_FTRACE) && defined(CONFIG_X86_64)
+#ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
 #endif
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f28837476..e3e17a0b7ff8 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1111,6 +1111,7 @@ ENTRY(ftrace_caller)
 	pushl %edx
 	movl 0xc(%esp), %eax
 	movl 0x4(%ebp), %edx
+	leal function_trace_op, %ecx
 	subl $MCOUNT_INSN_SIZE, %eax
 
 .globl ftrace_call
-- 
cgit v1.2.3


From 08f6fba503111e0336f2b4d6915a4a18f9b60e51 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Mon, 30 Apr 2012 16:20:23 -0400
Subject: ftrace/x86: Add separate function to save regs

Add a way to have different functions calling different trampolines.
If a ftrace_ops wants regs saved on the return, then have only the
functions with ops registered to save regs. Functions registered by
other ops would not be affected, unless the functions overlap.

If one ftrace_ops registered functions A, B and C and another ops
registered fucntions to save regs on A, and D, then only functions
A and D would be saving regs. Function B and C would work as normal.
Although A is registered by both ops: normal and saves regs; this is fine
as saving the regs is needed to satisfy one of the ops that calls it
but the regs are ignored by the other ops function.

x86_64 implements the full regs saving, and i386 just passes a NULL
for regs to satisfy the ftrace_ops passing. Where an arch must supply
both regs and ftrace_ops parameters, even if regs is just NULL.

It is OK for an arch to pass NULL regs. All function trace users that
require regs passing must add the flag FTRACE_OPS_FL_SAVE_REGS when
registering the ftrace_ops. If the arch does not support saving regs
then the ftrace_ops will fail to register. The flag
FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED may be set that will prevent the
ftrace_ops from failing to register. In this case, the handler may
either check if regs is not NULL or check if ARCH_SUPPORTS_FTRACE_SAVE_REGS.
If the arch supports passing regs it will set this macro and pass regs
for ops that request them. All other archs will just pass NULL.

Link: Link: http://lkml.kernel.org/r/20120711195745.107705970@goodmis.org

Cc: Alexander van Heukelum <heukelum@fastmail.fm>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h |  47 +++++++++++--------
 arch/x86/kernel/entry_32.S    |   4 +-
 arch/x86/kernel/entry_64.S    |  94 +++++++++++++++++++++++++++++++++----
 arch/x86/kernel/ftrace.c      |  77 ++++++++++++++++++++++++++++--
 include/linux/ftrace.h        | 107 +++++++++++++++++++++++++++++++++++++++---
 kernel/trace/ftrace.c         |  91 +++++++++++++++++++++++++++++++----
 6 files changed, 373 insertions(+), 47 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index b3bb1f3f2773..a8475014c4ad 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -3,27 +3,33 @@
 
 #ifdef __ASSEMBLY__
 
-	.macro MCOUNT_SAVE_FRAME
-	/* taken from glibc */
-	subq $0x38, %rsp
-	movq %rax, (%rsp)
-	movq %rcx, 8(%rsp)
-	movq %rdx, 16(%rsp)
-	movq %rsi, 24(%rsp)
-	movq %rdi, 32(%rsp)
-	movq %r8, 40(%rsp)
-	movq %r9, 48(%rsp)
+	/* skip is set if the stack was already partially adjusted */
+	.macro MCOUNT_SAVE_FRAME skip=0
+	 /*
+	  * We add enough stack to save all regs.
+	  */
+	subq $(SS+8-\skip), %rsp
+	movq %rax, RAX(%rsp)
+	movq %rcx, RCX(%rsp)
+	movq %rdx, RDX(%rsp)
+	movq %rsi, RSI(%rsp)
+	movq %rdi, RDI(%rsp)
+	movq %r8, R8(%rsp)
+	movq %r9, R9(%rsp)
+	 /* Move RIP to its proper location */
+	movq SS+8(%rsp), %rdx
+	movq %rdx, RIP(%rsp)
 	.endm
 
-	.macro MCOUNT_RESTORE_FRAME
-	movq 48(%rsp), %r9
-	movq 40(%rsp), %r8
-	movq 32(%rsp), %rdi
-	movq 24(%rsp), %rsi
-	movq 16(%rsp), %rdx
-	movq 8(%rsp), %rcx
-	movq (%rsp), %rax
-	addq $0x38, %rsp
+	.macro MCOUNT_RESTORE_FRAME skip=0
+	movq R9(%rsp), %r9
+	movq R8(%rsp), %r8
+	movq RDI(%rsp), %rdi
+	movq RSI(%rsp), %rsi
+	movq RDX(%rsp), %rdx
+	movq RCX(%rsp), %rcx
+	movq RAX(%rsp), %rax
+	addq $(SS+8-\skip), %rsp
 	.endm
 
 #endif
@@ -34,6 +40,9 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+#ifdef CONFIG_X86_64
+#define ARCH_SUPPORTS_FTRACE_SAVE_REGS
+#endif
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index e3e17a0b7ff8..5da11d1b85ae 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1109,7 +1109,8 @@ ENTRY(ftrace_caller)
 	pushl %eax
 	pushl %ecx
 	pushl %edx
-	movl 0xc(%esp), %eax
+	pushl $0	/* Pass NULL as regs pointer */
+	movl 4*4(%esp), %eax
 	movl 0x4(%ebp), %edx
 	leal function_trace_op, %ecx
 	subl $MCOUNT_INSN_SIZE, %eax
@@ -1118,6 +1119,7 @@ ENTRY(ftrace_caller)
 ftrace_call:
 	call ftrace_stub
 
+	addl $4,%esp	/* skip NULL pointer */
 	popl %edx
 	popl %ecx
 	popl %eax
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 2b4f94c5dc60..52bda2e8f7aa 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -73,21 +73,34 @@ ENTRY(mcount)
 	retq
 END(mcount)
 
+/* skip is set if stack has been adjusted */
+.macro ftrace_caller_setup skip=0
+	MCOUNT_SAVE_FRAME \skip
+
+	/* Load the ftrace_ops into the 3rd parameter */
+	leaq function_trace_op, %rdx
+
+	/* Load ip into the first parameter */
+	movq RIP(%rsp), %rdi
+	subq $MCOUNT_INSN_SIZE, %rdi
+	/* Load the parent_ip into the second parameter */
+	movq 8(%rbp), %rsi
+.endm
+
 ENTRY(ftrace_caller)
+	/* Check if tracing was disabled (quick check) */
 	cmpl $0, function_trace_stop
 	jne  ftrace_stub
 
-	MCOUNT_SAVE_FRAME
-
-	leaq function_trace_op, %rdx
-	movq 0x38(%rsp), %rdi
-	movq 8(%rbp), %rsi
-	subq $MCOUNT_INSN_SIZE, %rdi
+	ftrace_caller_setup
+	/* regs go into 4th parameter (but make it NULL) */
+	movq $0, %rcx
 
 GLOBAL(ftrace_call)
 	call ftrace_stub
 
 	MCOUNT_RESTORE_FRAME
+ftrace_return:
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
@@ -98,6 +111,71 @@ GLOBAL(ftrace_stub)
 	retq
 END(ftrace_caller)
 
+ENTRY(ftrace_regs_caller)
+	/* Save the current flags before compare (in SS location)*/
+	pushfq
+
+	/* Check if tracing was disabled (quick check) */
+	cmpl $0, function_trace_stop
+	jne  ftrace_restore_flags
+
+	/* skip=8 to skip flags saved in SS */
+	ftrace_caller_setup 8
+
+	/* Save the rest of pt_regs */
+	movq %r15, R15(%rsp)
+	movq %r14, R14(%rsp)
+	movq %r13, R13(%rsp)
+	movq %r12, R12(%rsp)
+	movq %r11, R11(%rsp)
+	movq %r10, R10(%rsp)
+	movq %rbp, RBP(%rsp)
+	movq %rbx, RBX(%rsp)
+	/* Copy saved flags */
+	movq SS(%rsp), %rcx
+	movq %rcx, EFLAGS(%rsp)
+	/* Kernel segments */
+	movq $__KERNEL_DS, %rcx
+	movq %rcx, SS(%rsp)
+	movq $__KERNEL_CS, %rcx
+	movq %rcx, CS(%rsp)
+	/* Stack - skipping return address */
+	leaq SS+16(%rsp), %rcx
+	movq %rcx, RSP(%rsp)
+
+	/* regs go into 4th parameter */
+	leaq (%rsp), %rcx
+
+GLOBAL(ftrace_regs_call)
+	call ftrace_stub
+
+	/* Copy flags back to SS, to restore them */
+	movq EFLAGS(%rsp), %rax
+	movq %rax, SS(%rsp)
+
+	/* restore the rest of pt_regs */
+	movq R15(%rsp), %r15
+	movq R14(%rsp), %r14
+	movq R13(%rsp), %r13
+	movq R12(%rsp), %r12
+	movq R10(%rsp), %r10
+	movq RBP(%rsp), %rbp
+	movq RBX(%rsp), %rbx
+
+	/* skip=8 to skip flags saved in SS */
+	MCOUNT_RESTORE_FRAME 8
+
+	/* Restore flags */
+	popfq
+
+	jmp ftrace_return
+ftrace_restore_flags:
+	popfq
+	jmp  ftrace_stub
+
+END(ftrace_regs_caller)
+
+
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 ENTRY(mcount)
 	cmpl $0, function_trace_stop
@@ -120,7 +198,7 @@ GLOBAL(ftrace_stub)
 trace:
 	MCOUNT_SAVE_FRAME
 
-	movq 0x38(%rsp), %rdi
+	movq RIP(%rsp), %rdi
 	movq 8(%rbp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rdi
 
@@ -141,7 +219,7 @@ ENTRY(ftrace_graph_caller)
 	MCOUNT_SAVE_FRAME
 
 	leaq 8(%rbp), %rdi
-	movq 0x38(%rsp), %rsi
+	movq RIP(%rsp), %rsi
 	movq (%rbp), %rdx
 	subq $MCOUNT_INSN_SIZE, %rsi
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index c3a7cb4bf6e6..b90eb1a13071 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,6 +206,23 @@ static int
 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 		   unsigned const char *new_code);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+/*
+ * Should never be called:
+ *  As it is only called by __ftrace_replace_code() which is called by
+ *  ftrace_replace_code() that x86 overrides, and by ftrace_update_code()
+ *  which is called to turn mcount into nops or nops into function calls
+ *  but not to convert a function from not using regs to one that uses
+ *  regs, which ftrace_modify_call() is for.
+ */
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+				 unsigned long addr)
+{
+	WARN_ON(1);
+	return -EINVAL;
+}
+#endif
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	unsigned long ip = (unsigned long)(&ftrace_call);
@@ -220,6 +237,16 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 	ret = ftrace_modify_code(ip, old, new);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+	/* Also update the regs callback function */
+	if (!ret) {
+		ip = (unsigned long)(&ftrace_regs_call);
+		memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE);
+		new = ftrace_call_replace(ip, (unsigned long)func);
+		ret = ftrace_modify_code(ip, old, new);
+	}
+#endif
+
 	atomic_dec(&modifying_ftrace_code);
 
 	return ret;
@@ -299,6 +326,32 @@ static int add_brk_on_nop(struct dyn_ftrace *rec)
 	return add_break(rec->ip, old);
 }
 
+/*
+ * If the record has the FTRACE_FL_REGS set, that means that it
+ * wants to convert to a callback that saves all regs. If FTRACE_FL_REGS
+ * is not not set, then it wants to convert to the normal callback.
+ */
+static unsigned long get_ftrace_addr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
+/*
+ * The FTRACE_FL_REGS_EN is set when the record already points to
+ * a function that saves all the regs. Basically the '_EN' version
+ * represents the current state of the function.
+ */
+static unsigned long get_ftrace_old_addr(struct dyn_ftrace *rec)
+{
+	if (rec->flags & FTRACE_FL_REGS_EN)
+		return (unsigned long)FTRACE_REGS_ADDR;
+	else
+		return (unsigned long)FTRACE_ADDR;
+}
+
 static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 {
 	unsigned long ftrace_addr;
@@ -306,7 +359,7 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
@@ -316,6 +369,10 @@ static int add_breakpoints(struct dyn_ftrace *rec, int enable)
 		/* converting nop to call */
 		return add_brk_on_nop(rec);
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
+		ftrace_addr = get_ftrace_old_addr(rec);
+		/* fall through */
 	case FTRACE_UPDATE_MAKE_NOP:
 		/* converting a call to a nop */
 		return add_brk_on_call(rec, ftrace_addr);
@@ -360,13 +417,21 @@ static int remove_breakpoint(struct dyn_ftrace *rec)
 		 * If not, don't touch the breakpoint, we make just create
 		 * a disaster.
 		 */
-		ftrace_addr = (unsigned long)FTRACE_ADDR;
+		ftrace_addr = get_ftrace_addr(rec);
+		nop = ftrace_call_replace(ip, ftrace_addr);
+
+		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) == 0)
+			goto update;
+
+		/* Check both ftrace_addr and ftrace_old_addr */
+		ftrace_addr = get_ftrace_old_addr(rec);
 		nop = ftrace_call_replace(ip, ftrace_addr);
 
 		if (memcmp(&ins[1], &nop[1], MCOUNT_INSN_SIZE - 1) != 0)
 			return -EINVAL;
 	}
 
+ update:
 	return probe_kernel_write((void *)ip, &nop[0], 1);
 }
 
@@ -405,12 +470,14 @@ static int add_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_test_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr  = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
 		return add_update_call(rec, ftrace_addr);
@@ -455,12 +522,14 @@ static int finish_update(struct dyn_ftrace *rec, int enable)
 
 	ret = ftrace_update_record(rec, enable);
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
+	ftrace_addr = get_ftrace_addr(rec);
 
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
 
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
 	case FTRACE_UPDATE_MAKE_CALL:
 		/* converting nop to call */
 		return finish_update_call(rec, ftrace_addr);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e4202881fb00..ab39990cc43f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -71,12 +71,28 @@ typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  *           could be controled by following calls:
  *             ftrace_function_local_enable
  *             ftrace_function_local_disable
+ * SAVE_REGS - The ftrace_ops wants regs saved at each function called
+ *            and passed to the callback. If this flag is set, but the
+ *            architecture does not support passing regs
+ *            (ARCH_SUPPORTS_FTRACE_SAVE_REGS is not defined), then the
+ *            ftrace_ops will fail to register, unless the next flag
+ *            is set.
+ * SAVE_REGS_IF_SUPPORTED - This is the same as SAVE_REGS, but if the
+ *            handler can handle an arch that does not save regs
+ *            (the handler tests if regs == NULL), then it can set
+ *            this flag instead. It will not fail registering the ftrace_ops
+ *            but, the regs field will be NULL if the arch does not support
+ *            passing regs to the handler.
+ *            Note, if this flag is set, the SAVE_REGS flag will automatically
+ *            get set upon registering the ftrace_ops, if the arch supports it.
  */
 enum {
-	FTRACE_OPS_FL_ENABLED		= 1 << 0,
-	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
-	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
-	FTRACE_OPS_FL_CONTROL		= 1 << 3,
+	FTRACE_OPS_FL_ENABLED			= 1 << 0,
+	FTRACE_OPS_FL_GLOBAL			= 1 << 1,
+	FTRACE_OPS_FL_DYNAMIC			= 1 << 2,
+	FTRACE_OPS_FL_CONTROL			= 1 << 3,
+	FTRACE_OPS_FL_SAVE_REGS			= 1 << 4,
+	FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED	= 1 << 5,
 };
 
 struct ftrace_ops {
@@ -254,12 +270,31 @@ extern void unregister_ftrace_function_probe_all(char *glob);
 
 extern int ftrace_text_reserved(void *start, void *end);
 
+/*
+ * The dyn_ftrace record's flags field is split into two parts.
+ * the first part which is '0-FTRACE_REF_MAX' is a counter of
+ * the number of callbacks that have registered the function that
+ * the dyn_ftrace descriptor represents.
+ *
+ * The second part is a mask:
+ *  ENABLED - the function is being traced
+ *  REGS    - the record wants the function to save regs
+ *  REGS_EN - the function is set up to save regs.
+ *
+ * When a new ftrace_ops is registered and wants a function to save
+ * pt_regs, the rec->flag REGS is set. When the function has been
+ * set up to save regs, the REG_EN flag is set. Once a function
+ * starts saving regs it will do so until all ftrace_ops are removed
+ * from tracing that function.
+ */
 enum {
-	FTRACE_FL_ENABLED	= (1 << 30),
+	FTRACE_FL_ENABLED	= (1UL << 29),
+	FTRACE_FL_REGS		= (1UL << 30),
+	FTRACE_FL_REGS_EN	= (1UL << 31)
 };
 
-#define FTRACE_FL_MASK		(0x3UL << 30)
-#define FTRACE_REF_MAX		((1 << 30) - 1)
+#define FTRACE_FL_MASK		(0x7UL << 29)
+#define FTRACE_REF_MAX		((1UL << 29) - 1)
 
 struct dyn_ftrace {
 	union {
@@ -290,9 +325,23 @@ enum {
 	FTRACE_STOP_FUNC_RET		= (1 << 4),
 };
 
+/*
+ * The FTRACE_UPDATE_* enum is used to pass information back
+ * from the ftrace_update_record() and ftrace_test_record()
+ * functions. These are called by the code update routines
+ * to find out what is to be done for a given function.
+ *
+ *  IGNORE           - The function is already what we want it to be
+ *  MAKE_CALL        - Start tracing the function
+ *  MODIFY_CALL      - Stop saving regs for the function
+ *  MODIFY_CALL_REGS - Start saving regs for the function
+ *  MAKE_NOP         - Stop tracing the function
+ */
 enum {
 	FTRACE_UPDATE_IGNORE,
 	FTRACE_UPDATE_MAKE_CALL,
+	FTRACE_UPDATE_MODIFY_CALL,
+	FTRACE_UPDATE_MODIFY_CALL_REGS,
 	FTRACE_UPDATE_MAKE_NOP,
 };
 
@@ -344,7 +393,9 @@ extern int ftrace_dyn_arch_init(void *data);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
+extern void ftrace_regs_caller(void);
 extern void ftrace_call(void);
+extern void ftrace_regs_call(void);
 extern void mcount_call(void);
 
 void ftrace_modify_all_code(int command);
@@ -352,6 +403,15 @@ void ftrace_modify_all_code(int command);
 #ifndef FTRACE_ADDR
 #define FTRACE_ADDR ((unsigned long)ftrace_caller)
 #endif
+
+#ifndef FTRACE_REGS_ADDR
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+# define FTRACE_REGS_ADDR ((unsigned long)ftrace_regs_caller)
+#else
+# define FTRACE_REGS_ADDR FTRACE_ADDR
+#endif
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 extern void ftrace_graph_caller(void);
 extern int ftrace_enable_ftrace_graph_caller(void);
@@ -407,6 +467,39 @@ extern int ftrace_make_nop(struct module *mod,
  */
 extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);
 
+#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+/**
+ * ftrace_modify_call - convert from one addr to another (no nop)
+ * @rec: the mcount call site record
+ * @old_addr: the address expected to be currently called to
+ * @addr: the address to change to
+ *
+ * This is a very sensitive operation and great care needs
+ * to be taken by the arch.  The operation should carefully
+ * read the location, check to see if what is read is indeed
+ * what we expect it to be, and then on success of the compare,
+ * it should write to the location.
+ *
+ * The code segment at @rec->ip should be a caller to @old_addr
+ *
+ * Return must be:
+ *  0 on success
+ *  -EFAULT on error reading the location
+ *  -EINVAL on a failed compare of the contents
+ *  -EPERM  on error writing to the location
+ * Any other value will be considered a failure.
+ */
+extern int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+			      unsigned long addr);
+#else
+/* Should never be called */
+static inline int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
+				     unsigned long addr)
+{
+	return -EINVAL;
+}
+#endif
+
 /* May be defined in arch */
 extern int ftrace_arch_read_dyn_info(char *buf, int size);
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6ff07ad0ede3..c55f7e274613 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -314,6 +314,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
 		return -EINVAL;
 
+#ifndef ARCH_SUPPORTS_FTRACE_SAVE_REGS
+	/*
+	 * If the ftrace_ops specifies SAVE_REGS, then it only can be used
+	 * if the arch supports it, or SAVE_REGS_IF_SUPPORTED is also set.
+	 * Setting SAVE_REGS_IF_SUPPORTED makes SAVE_REGS irrelevant.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS &&
+	    !(ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED))
+		return -EINVAL;
+
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED)
+		ops->flags |= FTRACE_OPS_FL_SAVE_REGS;
+#endif
+
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
@@ -1515,6 +1529,12 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			rec->flags++;
 			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == FTRACE_REF_MAX))
 				return;
+			/*
+			 * If any ops wants regs saved for this function
+			 * then all ops will get saved regs.
+			 */
+			if (ops->flags & FTRACE_OPS_FL_SAVE_REGS)
+				rec->flags |= FTRACE_FL_REGS;
 		} else {
 			if (FTRACE_WARN_ON((rec->flags & ~FTRACE_FL_MASK) == 0))
 				return;
@@ -1606,18 +1626,59 @@ static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
 	if (enable && (rec->flags & ~FTRACE_FL_MASK))
 		flag = FTRACE_FL_ENABLED;
 
+	/*
+	 * If enabling and the REGS flag does not match the REGS_EN, then
+	 * do not ignore this record. Set flags to fail the compare against
+	 * ENABLED.
+	 */
+	if (flag &&
+	    (!(rec->flags & FTRACE_FL_REGS) != !(rec->flags & FTRACE_FL_REGS_EN)))
+		flag |= FTRACE_FL_REGS;
+
 	/* If the state of this record hasn't changed, then do nothing */
 	if ((rec->flags & FTRACE_FL_ENABLED) == flag)
 		return FTRACE_UPDATE_IGNORE;
 
 	if (flag) {
-		if (update)
+		/* Save off if rec is being enabled (for return value) */
+		flag ^= rec->flags & FTRACE_FL_ENABLED;
+
+		if (update) {
 			rec->flags |= FTRACE_FL_ENABLED;
-		return FTRACE_UPDATE_MAKE_CALL;
+			if (flag & FTRACE_FL_REGS) {
+				if (rec->flags & FTRACE_FL_REGS)
+					rec->flags |= FTRACE_FL_REGS_EN;
+				else
+					rec->flags &= ~FTRACE_FL_REGS_EN;
+			}
+		}
+
+		/*
+		 * If this record is being updated from a nop, then
+		 *   return UPDATE_MAKE_CALL.
+		 * Otherwise, if the EN flag is set, then return
+		 *   UPDATE_MODIFY_CALL_REGS to tell the caller to convert
+		 *   from the non-save regs, to a save regs function.
+		 * Otherwise,
+		 *   return UPDATE_MODIFY_CALL to tell the caller to convert
+		 *   from the save regs, to a non-save regs function.
+		 */
+		if (flag & FTRACE_FL_ENABLED)
+			return FTRACE_UPDATE_MAKE_CALL;
+		else if (rec->flags & FTRACE_FL_REGS_EN)
+			return FTRACE_UPDATE_MODIFY_CALL_REGS;
+		else
+			return FTRACE_UPDATE_MODIFY_CALL;
 	}
 
-	if (update)
-		rec->flags &= ~FTRACE_FL_ENABLED;
+	if (update) {
+		/* If there's no more users, clear all flags */
+		if (!(rec->flags & ~FTRACE_FL_MASK))
+			rec->flags = 0;
+		else
+			/* Just disable the record (keep REGS state) */
+			rec->flags &= ~FTRACE_FL_ENABLED;
+	}
 
 	return FTRACE_UPDATE_MAKE_NOP;
 }
@@ -1652,13 +1713,17 @@ int ftrace_test_record(struct dyn_ftrace *rec, int enable)
 static int
 __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 {
+	unsigned long ftrace_old_addr;
 	unsigned long ftrace_addr;
 	int ret;
 
-	ftrace_addr = (unsigned long)FTRACE_ADDR;
-
 	ret = ftrace_update_record(rec, enable);
 
+	if (rec->flags & FTRACE_FL_REGS)
+		ftrace_addr = (unsigned long)FTRACE_REGS_ADDR;
+	else
+		ftrace_addr = (unsigned long)FTRACE_ADDR;
+
 	switch (ret) {
 	case FTRACE_UPDATE_IGNORE:
 		return 0;
@@ -1668,6 +1733,15 @@ __ftrace_replace_code(struct dyn_ftrace *rec, int enable)
 
 	case FTRACE_UPDATE_MAKE_NOP:
 		return ftrace_make_nop(NULL, rec, ftrace_addr);
+
+	case FTRACE_UPDATE_MODIFY_CALL_REGS:
+	case FTRACE_UPDATE_MODIFY_CALL:
+		if (rec->flags & FTRACE_FL_REGS)
+			ftrace_old_addr = (unsigned long)FTRACE_ADDR;
+		else
+			ftrace_old_addr = (unsigned long)FTRACE_REGS_ADDR;
+
+		return ftrace_modify_call(rec, ftrace_old_addr, ftrace_addr);
 	}
 
 	return -1; /* unknow ftrace bug */
@@ -2421,8 +2495,9 @@ static int t_show(struct seq_file *m, void *v)
 
 	seq_printf(m, "%ps", (void *)rec->ip);
 	if (iter->flags & FTRACE_ITER_ENABLED)
-		seq_printf(m, " (%ld)",
-			   rec->flags & ~FTRACE_FL_MASK);
+		seq_printf(m, " (%ld)%s",
+			   rec->flags & ~FTRACE_FL_MASK,
+			   rec->flags & FTRACE_FL_REGS ? " R" : "");
 	seq_printf(m, "\n");
 
 	return 0;
-- 
cgit v1.2.3


From 4de72395ff4cf48e23b61986dbc90b99a7c4ed97 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 5 Jun 2012 20:00:11 -0400
Subject: ftrace/x86: Add save_regs for i386 function calls

Add saving full regs for function tracing on i386.
The saving of regs was influenced by patches sent out by
Masami Hiramatsu.

Link: Link: http://lkml.kernel.org/r/20120711195745.379060003@goodmis.org

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/ftrace.h |  2 --
 arch/x86/kernel/entry_32.S    | 68 +++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/ftrace.c      |  4 ---
 3 files changed, 68 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a8475014c4ad..a6cae0c1720c 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -40,10 +40,8 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 #define ARCH_SUPPORTS_FTRACE_OPS 1
-#ifdef CONFIG_X86_64
 #define ARCH_SUPPORTS_FTRACE_SAVE_REGS
 #endif
-#endif
 
 #ifndef __ASSEMBLY__
 extern void mcount(void);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 5da11d1b85ae..46caa5649a5e 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1123,6 +1123,7 @@ ftrace_call:
 	popl %edx
 	popl %ecx
 	popl %eax
+ftrace_ret:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 .globl ftrace_graph_call
 ftrace_graph_call:
@@ -1134,6 +1135,73 @@ ftrace_stub:
 	ret
 END(ftrace_caller)
 
+ENTRY(ftrace_regs_caller)
+	pushf	/* push flags before compare (in cs location) */
+	cmpl $0, function_trace_stop
+	jne ftrace_restore_flags
+
+	/*
+	 * i386 does not save SS and ESP when coming from kernel.
+	 * Instead, to get sp, &regs->sp is used (see ptrace.h).
+	 * Unfortunately, that means eflags must be at the same location
+	 * as the current return ip is. We move the return ip into the
+	 * ip location, and move flags into the return ip location.
+	 */
+	pushl 4(%esp)	/* save return ip into ip slot */
+	subl $MCOUNT_INSN_SIZE, (%esp)	/* Adjust ip */
+
+	pushl $0	/* Load 0 into orig_ax */
+	pushl %gs
+	pushl %fs
+	pushl %es
+	pushl %ds
+	pushl %eax
+	pushl %ebp
+	pushl %edi
+	pushl %esi
+	pushl %edx
+	pushl %ecx
+	pushl %ebx
+
+	movl 13*4(%esp), %eax	/* Get the saved flags */
+	movl %eax, 14*4(%esp)	/* Move saved flags into regs->flags location */
+				/* clobbering return ip */
+	movl $__KERNEL_CS,13*4(%esp)
+
+	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
+	movl 0x4(%ebp), %edx	/* Load parent ip (2cd parameter) */
+	lea  (%esp), %ecx
+	pushl %ecx		/* Save pt_regs as 4th parameter */
+	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+
+GLOBAL(ftrace_regs_call)
+	call ftrace_stub
+
+	addl $4, %esp		/* Skip pt_regs */
+	movl 14*4(%esp), %eax	/* Move flags back into cs */
+	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
+	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
+	addl $MCOUNT_INSN_SIZE, %eax
+	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
+
+	popl %ebx
+	popl %ecx
+	popl %edx
+	popl %esi
+	popl %edi
+	popl %ebp
+	popl %eax
+	popl %ds
+	popl %es
+	popl %fs
+	popl %gs
+	addl $8, %esp		/* Skip orig_ax and ip */
+	popf			/* Pop flags at end (no addl to corrupt flags) */
+	jmp ftrace_ret
+
+ftrace_restore_flags:
+	popf
+	jmp  ftrace_stub
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 ENTRY(mcount)
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index b90eb1a13071..1d414029f1d8 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -206,7 +206,6 @@ static int
 ftrace_modify_code(unsigned long ip, unsigned const char *old_code,
 		   unsigned const char *new_code);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 /*
  * Should never be called:
  *  As it is only called by __ftrace_replace_code() which is called by
@@ -221,7 +220,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
 	WARN_ON(1);
 	return -EINVAL;
 }
-#endif
 
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
@@ -237,7 +235,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 
 	ret = ftrace_modify_code(ip, old, new);
 
-#ifdef ARCH_SUPPORTS_FTRACE_SAVE_REGS
 	/* Also update the regs callback function */
 	if (!ret) {
 		ip = (unsigned long)(&ftrace_regs_call);
@@ -245,7 +242,6 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
 		new = ftrace_call_replace(ip, (unsigned long)func);
 		ret = ftrace_modify_code(ip, old, new);
 	}
-#endif
 
 	atomic_dec(&modifying_ftrace_code);
 
-- 
cgit v1.2.3


From 9d3c92af47d853d4e31ee971dba7bc086275b7b3 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:50:48 +0800
Subject: KVM: x86: remove unnecessary mark_page_dirty

fix:
[  132.474633] 3.5.0-rc1+ #50 Not tainted
[  132.474634] -------------------------------
[  132.474635] include/linux/kvm_host.h:369 suspicious rcu_dereference_check() usage!
[  132.474636]
[  132.474636] other info that might help us debug this:
[  132.474636]
[  132.474638]
[  132.474638] rcu_scheduler_active = 1, debug_locks = 1
[  132.474640] 1 lock held by qemu-kvm/2832:
[  132.474657]  #0:  (&vcpu->mutex){+.+.+.}, at: [<ffffffffa01e1636>] vcpu_load+0x1e/0x91 [kvm]
[  132.474658]
[  132.474658] stack backtrace:
[  132.474660] Pid: 2832, comm: qemu-kvm Not tainted 3.5.0-rc1+ #50
[  132.474661] Call Trace:
[  132.474665]  [<ffffffff81092f40>] lockdep_rcu_suspicious+0xfc/0x105
[  132.474675]  [<ffffffffa01e0c85>] kvm_memslots+0x6d/0x75 [kvm]
[  132.474683]  [<ffffffffa01e0ca1>] gfn_to_memslot+0x14/0x4c [kvm]
[  132.474693]  [<ffffffffa01e3575>] mark_page_dirty+0x17/0x2a [kvm]
[  132.474706]  [<ffffffffa01f21ea>] kvm_arch_vcpu_ioctl+0xbcf/0xc07 [kvm]

Actually, we do not write vcpu->arch.time at this time, mark_page_dirty
should be removed.

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 829b4e972558..ecc71dde4bb3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2632,7 +2632,6 @@ static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 	if (!vcpu->arch.time_page)
 		return -EINVAL;
 	src->flags |= PVCLOCK_GUEST_STOPPED;
-	mark_page_dirty(vcpu->kvm, vcpu->arch.time >> PAGE_SHIFT);
 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	return 0;
 }
-- 
cgit v1.2.3


From 86fde74cf5b829627b37ca86322acfdd99b524b8 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:52:52 +0800
Subject: KVM: MMU: track the refcount when unmap the page

It will trigger a WARN_ON if the page has been freed but it is still
used in mmu, it can help us to detect mm bug early

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a5d6ef785b7e..685a48557389 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -556,6 +556,14 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 		return 0;
 
 	pfn = spte_to_pfn(old_spte);
+
+	/*
+	 * KVM does not hold the refcount of the page used by
+	 * kvm mmu, before reclaiming the page, we should
+	 * unmap it from mmu first.
+	 */
+	WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+
 	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
 	if (!shadow_dirty_mask || (old_spte & shadow_dirty_mask))
-- 
cgit v1.2.3


From 903816fa4d016e20ec71a1a97700cfcdda115580 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:54:11 +0800
Subject: KVM: using get_fault_pfn to get the fault pfn

Using get_fault_pfn to cleanup the code

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c       |  6 ++----
 include/linux/kvm_host.h |  5 +----
 virt/kvm/kvm_main.c      | 13 ++++---------
 3 files changed, 7 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 685a48557389..f85cc21ae95d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2513,10 +2513,8 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 	unsigned long hva;
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
-	if (!slot) {
-		get_page(fault_page);
-		return page_to_pfn(fault_page);
-	}
+	if (!slot)
+		return get_fault_pfn();
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6f6c18a03c50..1a7f838d30c6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -383,15 +383,11 @@ id_to_memslot(struct kvm_memslots *slots, int id)
 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
 
 extern struct page *bad_page;
-extern struct page *fault_page;
-
 extern pfn_t bad_pfn;
-extern pfn_t fault_pfn;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
 int is_hwpoison_pfn(pfn_t pfn);
-int is_fault_pfn(pfn_t pfn);
 int is_noslot_pfn(pfn_t pfn);
 int is_invalid_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
@@ -441,6 +437,7 @@ void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
 void kvm_get_pfn(pfn_t pfn);
+pfn_t get_fault_pfn(void);
 
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e2b1a159e5df..0fbbf2d21603 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -103,8 +103,8 @@ static bool largepages_enabled = true;
 static struct page *hwpoison_page;
 static pfn_t hwpoison_pfn;
 
-struct page *fault_page;
-pfn_t fault_pfn;
+static struct page *fault_page;
+static pfn_t fault_pfn;
 
 inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
@@ -949,12 +949,6 @@ int is_hwpoison_pfn(pfn_t pfn)
 }
 EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
 
-int is_fault_pfn(pfn_t pfn)
-{
-	return pfn == fault_pfn;
-}
-EXPORT_SYMBOL_GPL(is_fault_pfn);
-
 int is_noslot_pfn(pfn_t pfn)
 {
 	return pfn == bad_pfn;
@@ -1038,11 +1032,12 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
-static pfn_t get_fault_pfn(void)
+pfn_t get_fault_pfn(void)
 {
 	get_page(fault_page);
 	return fault_pfn;
 }
+EXPORT_SYMBOL_GPL(get_fault_pfn);
 
 int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	unsigned long start, int write, struct page **page)
-- 
cgit v1.2.3


From d566104853361cc377c61f70e41c1ad3d44b86c6 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 17 Jul 2012 21:56:16 +0800
Subject: KVM: remove the unused parameter of gfn_to_pfn_memslot

The parameter, 'kvm', is not used in gfn_to_pfn_memslot, we can happily remove
it

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c |  2 +-
 arch/x86/kvm/mmu.c          |  2 +-
 include/linux/kvm_host.h    |  5 ++---
 virt/kvm/iommu.c            | 10 +++++-----
 virt/kvm/kvm_main.c         | 15 +++++++--------
 5 files changed, 16 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c510fc961302..c8f6c5826742 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -520,7 +520,7 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 
 	if (likely(!pfnmap)) {
 		unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT);
-		pfn = gfn_to_pfn_memslot(vcpu_e500->vcpu.kvm, slot, gfn);
+		pfn = gfn_to_pfn_memslot(slot, gfn);
 		if (is_error_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f85cc21ae95d..4f77f7ac6d25 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2518,7 +2518,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
-	return hva_to_pfn_atomic(vcpu->kvm, hva);
+	return hva_to_pfn_atomic(hva);
 }
 
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e8d13a072d24..db9aa917840a 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -418,15 +418,14 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr);
+pfn_t hva_to_pfn_atomic(unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
 		       bool write_fault, bool *writable);
 pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-			 struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_pfn_dirty(pfn_t);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index e9fff9830bf0..c03f1fb26701 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -42,13 +42,13 @@ static int kvm_iommu_unmap_memslots(struct kvm *kvm);
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages);
 
-static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
-			   gfn_t gfn, unsigned long size)
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+			   unsigned long size)
 {
 	gfn_t end_gfn;
 	pfn_t pfn;
 
-	pfn     = gfn_to_pfn_memslot(kvm, slot, gfn);
+	pfn     = gfn_to_pfn_memslot(slot, gfn);
 	end_gfn = gfn + (size >> PAGE_SHIFT);
 	gfn    += 1;
 
@@ -56,7 +56,7 @@ static pfn_t kvm_pin_pages(struct kvm *kvm, struct kvm_memory_slot *slot,
 		return pfn;
 
 	while (gfn < end_gfn)
-		gfn_to_pfn_memslot(kvm, slot, gfn++);
+		gfn_to_pfn_memslot(slot, gfn++);
 
 	return pfn;
 }
@@ -105,7 +105,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 * Pin all pages we are about to map in memory. This is
 		 * important because we unmap and unpin in 4kb steps later.
 		 */
-		pfn = kvm_pin_pages(kvm, slot, gfn, page_size);
+		pfn = kvm_pin_pages(slot, gfn, page_size);
 		if (is_error_pfn(pfn)) {
 			gfn += 1;
 			continue;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f955eee92aa9..68dda513cd72 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1062,8 +1062,8 @@ static inline int check_user_page_hwpoison(unsigned long addr)
 	return rc == -EHWPOISON;
 }
 
-static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
-			bool *async, bool write_fault, bool *writable)
+static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
+			bool write_fault, bool *writable)
 {
 	struct page *page[1];
 	int npages = 0;
@@ -1143,9 +1143,9 @@ static pfn_t hva_to_pfn(struct kvm *kvm, unsigned long addr, bool atomic,
 	return pfn;
 }
 
-pfn_t hva_to_pfn_atomic(struct kvm *kvm, unsigned long addr)
+pfn_t hva_to_pfn_atomic(unsigned long addr)
 {
-	return hva_to_pfn(kvm, addr, true, NULL, true, NULL);
+	return hva_to_pfn(addr, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
 
@@ -1163,7 +1163,7 @@ static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 		return page_to_pfn(bad_page);
 	}
 
-	return hva_to_pfn(kvm, addr, atomic, async, write_fault, writable);
+	return hva_to_pfn(addr, atomic, async, write_fault, writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1192,11 +1192,10 @@ pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
-pfn_t gfn_to_pfn_memslot(struct kvm *kvm,
-			 struct kvm_memory_slot *slot, gfn_t gfn)
+pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
 	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(kvm, addr, false, NULL, true, NULL);
+	return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
-- 
cgit v1.2.3


From 0fa060714753ef65aef294b3462efb0212520933 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:16:19 +0800
Subject: KVM: VMX: Fix typos

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c39b60707e02..2300e5319ed9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1343,7 +1343,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
 	guest_efer = vmx->vcpu.arch.efer;
 
 	/*
-	 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
+	 * NX is emulated; LMA and LME handled by hardware; SCE meaningless
 	 * outside long mode
 	 */
 	ignore_bits = EFER_NX | EFER_SCE;
@@ -3261,7 +3261,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	 * qemu binaries.
 	 *   IA32 arch specifies that at the time of processor reset the
 	 * "Accessed" bit in the AR field of segment registers is 1. And qemu
-	 * is setting it to 0 in the usedland code. This causes invalid guest
+	 * is setting it to 0 in the userland code. This causes invalid guest
 	 * state vmexit when "unrestricted guest" mode is turned on.
 	 *    Fix for this setup issue in cpu_reset is being pushed in the qemu
 	 * tree. Newer qemu binaries with that qemu fix would not need this
@@ -4446,7 +4446,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 	hypercall[2] = 0xc1;
 }
 
-/* called to set cr0 as approriate for a mov-to-cr0 exit. */
+/* called to set cr0 as appropriate for a mov-to-cr0 exit. */
 static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	if (to_vmx(vcpu)->nested.vmxon &&
-- 
cgit v1.2.3


From c5ec2e56d0a654797ee43a31001738ccd05eef0b Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:16:43 +0800
Subject: KVM: SVM: Fix typos

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/svm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index baead950d6c8..687d0c30e559 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2063,7 +2063,7 @@ static inline bool nested_svm_intr(struct vcpu_svm *svm)
 	if (svm->nested.intercept & 1ULL) {
 		/*
 		 * The #vmexit can't be emulated here directly because this
-		 * code path runs with irqs and preemtion disabled. A
+		 * code path runs with irqs and preemption disabled. A
 		 * #vmexit emulation might sleep. Only signal request for
 		 * the #vmexit here.
 		 */
@@ -2409,7 +2409,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
 	/*
 	 * This function merges the msr permission bitmaps of kvm and the
-	 * nested vmcb. It is omptimized in that it only merges the parts where
+	 * nested vmcb. It is optimized in that it only merges the parts where
 	 * the kvm msr permission bitmap may contain zero bits
 	 */
 	int i;
-- 
cgit v1.2.3


From 4a9699807c491740c4dfe7b6a06703e1d262e802 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:17:27 +0800
Subject: KVM: x86: Fix typos in x86.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ecc71dde4bb3..3d9d08edbf29 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1093,7 +1093,7 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, u64 data)
 		 * For each generation, we track the original measured
 		 * nanosecond time, offset, and write, so if TSCs are in
 		 * sync, we can match exact offset, and if not, we can match
-		 * exact software computaion in compute_guest_tsc()
+		 * exact software computation in compute_guest_tsc()
 		 *
 		 * These values are tracked in kvm->arch.cur_xxx variables.
 		 */
@@ -1500,7 +1500,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
 {
 	gpa_t gpa = data & ~0x3f;
 
-	/* Bits 2:5 are resrved, Should be zero */
+	/* Bits 2:5 are reserved, Should be zero */
 	if (data & 0x3c)
 		return 1;
 
@@ -1723,7 +1723,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		 * Ignore all writes to this no longer documented MSR.
 		 * Writes are only relevant for old K7 processors,
 		 * all pre-dating SVM, but a recommended workaround from
-		 * AMD for these chips. It is possible to speicify the
+		 * AMD for these chips. It is possible to specify the
 		 * affected processor models on the command line, hence
 		 * the need to ignore the workaround.
 		 */
@@ -4491,7 +4491,7 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 
 	/*
 	 * if emulation was due to access to shadowed page table
-	 * and it failed try to unshadow page and re-entetr the
+	 * and it failed try to unshadow page and re-enter the
 	 * guest to let CPU execute the instruction.
 	 */
 	if (kvm_mmu_unprotect_page_virt(vcpu, gva))
@@ -5587,7 +5587,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		/*
 		 * We are here if userspace calls get_regs() in the middle of
 		 * instruction emulation. Registers state needs to be copied
-		 * back from emulation context to vcpu. Usrapace shouldn't do
+		 * back from emulation context to vcpu. Userspace shouldn't do
 		 * that usually, but some bad designed PV devices (vmware
 		 * backdoor interface) need this to work
 		 */
@@ -6116,7 +6116,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	 * as we reset last_host_tsc on all VCPUs to stop this from being
 	 * called multiple times (one for each physical CPU bringup).
 	 *
-	 * Platforms with unnreliable TSCs don't have to deal with this, they
+	 * Platforms with unreliable TSCs don't have to deal with this, they
 	 * will be compensated by the logic in vcpu_load, which sets the TSC to
 	 * catchup mode.  This will catchup all VCPUs to real time, but cannot
 	 * guarantee that they stay in perfect synchronization.
@@ -6391,7 +6391,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		map_flags = MAP_SHARED | MAP_ANONYMOUS;
 
 	/*To keep backward compatibility with older userspace,
-	 *x86 needs to hanlde !user_alloc case.
+	 *x86 needs to handle !user_alloc case.
 	 */
 	if (!user_alloc) {
 		if (npages && !old.rmap) {
-- 
cgit v1.2.3


From fc0586807dc4e307da6d3ba4ed5c927b6d27276c Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:19:51 +0800
Subject: KVM: x86: Fix typos in emulate.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 97d9a9914ba8..85b611e13e84 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -642,7 +642,7 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 			if (addr.ea > lim || (u32)(addr.ea + size - 1) > lim)
 				goto bad;
 		} else {
-			/* exapand-down segment */
+			/* expand-down segment */
 			if (addr.ea <= lim || (u32)(addr.ea + size - 1) <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
@@ -1383,7 +1383,7 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	err_code = selector & 0xfffc;
 	err_vec = GP_VECTOR;
 
-	/* can't load system descriptor into segment selecor */
+	/* can't load system descriptor into segment selector */
 	if (seg <= VCPU_SREG_GS && !seg_desc.s)
 		goto exception;
 
@@ -2398,7 +2398,7 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 	set_segment_selector(ctxt, tss->ds, VCPU_SREG_DS);
 
 	/*
-	 * Now load segment descriptors. If fault happenes at this stage
+	 * Now load segment descriptors. If fault happens at this stage
 	 * it is handled in a context of new task
 	 */
 	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
@@ -2640,7 +2640,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check agains DPL of the TSS
+	 * 3. jmp/call to TSS: Check against DPL of the TSS
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2681,7 +2681,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 		ctxt->eflags = ctxt->eflags & ~X86_EFLAGS_NT;
 
 	/* set back link to prev task only if NT bit is set in eflags
-	   note that old_tss_sel is not used afetr this point */
+	   note that old_tss_sel is not used after this point */
 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
 		old_tss_sel = 0xffff;
 
-- 
cgit v1.2.3


From bbbda79510b6e5d399fae76bdb9b999286eb1b59 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:20:58 +0800
Subject: KVM: x86: Fix typos in cpuid.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/cpuid.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 0595f1397b7c..b496da684bd6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -316,7 +316,7 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 	}
 	case 7: {
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
-		/* Mask ebx against host capbability word 9 */
+		/* Mask ebx against host capability word 9 */
 		if (index == 0) {
 			entry->ebx &= kvm_supported_word9_x86_features;
 			cpuid_mask(&entry->ebx, 9);
-- 
cgit v1.2.3


From d5b0b5b196b474ef26f46f2036c8fbaa8cca2cf5 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:22:57 +0800
Subject: KVM: x86: Fix typos in lapic.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ce878788a39f..fff7173f6a71 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -719,7 +719,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 {
 	unsigned char alignment = offset & 0xf;
 	u32 result;
-	/* this bitmask has a bit cleared for each reserver register */
+	/* this bitmask has a bit cleared for each reserved register */
 	static const u64 rmask = 0x43ff01ffffffe70cULL;
 
 	if ((alignment + len) > 4) {
@@ -792,7 +792,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	atomic_set(&apic->lapic_timer.pending, 0);
 
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
-		/* lapic timer in oneshot or peroidic mode */
+		/* lapic timer in oneshot or periodic mode */
 		now = apic->lapic_timer.timer.base->get_time();
 		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
 			    * APIC_BUS_CYCLE_NS * apic->divide_count;
-- 
cgit v1.2.3


From c7a7062fa00db7dc66280a72cd9dad0f3595bc66 Mon Sep 17 00:00:00 2001
From: Guo Chao <yan@linux.vnet.ibm.com>
Date: Thu, 28 Jun 2012 15:23:08 +0800
Subject: KVM: x86: Fix typos in pmu.c

Signed-off-by: Guo Chao <yan@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 2e88438ffd83..db206a46e0dc 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -1,5 +1,5 @@
 /*
- * Kernel-based Virtual Machine -- Performane Monitoring Unit support
+ * Kernel-based Virtual Machine -- Performance Monitoring Unit support
  *
  * Copyright 2011 Red Hat, Inc. and/or its affiliates.
  *
-- 
cgit v1.2.3


From 93b6547e2219784b2df790353e083e0bdbebd2c2 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 19 Jul 2012 13:55:53 +0300
Subject: KVM: switch to symbolic name for irq_states size

Use PIC_NUM_PINS instead of hard-coded 16 for pic pins.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/irq.h  | 2 +-
 virt/kvm/irq_comm.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 2086f2bfba33..2d03568e9498 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -70,7 +70,7 @@ struct kvm_pic {
 	struct kvm_io_device dev_slave;
 	struct kvm_io_device dev_eclr;
 	void (*ack_notifier)(void *opaque, int irq);
-	unsigned long irq_states[16];
+	unsigned long irq_states[PIC_NUM_PINS];
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index a6a0365475ed..22aae8fd146b 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -343,11 +343,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt,
 		switch (ue->u.irqchip.irqchip) {
 		case KVM_IRQCHIP_PIC_MASTER:
 			e->set = kvm_set_pic_irq;
-			max_pin = 16;
+			max_pin = PIC_NUM_PINS;
 			break;
 		case KVM_IRQCHIP_PIC_SLAVE:
 			e->set = kvm_set_pic_irq;
-			max_pin = 16;
+			max_pin = PIC_NUM_PINS;
 			delta = 8;
 			break;
 		case KVM_IRQCHIP_IOAPIC:
-- 
cgit v1.2.3


From f2a743473194a1ad44a85f8b63aeef9d63e5bf47 Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Wed, 18 Jul 2012 19:07:32 +0530
Subject: KVM: Add config to support ple or cpu relax optimzation

Suggested-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Reviewed-by: Rik van Riel <riel@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com> # on s390x
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/s390/kvm/Kconfig | 1 +
 arch/x86/kvm/Kconfig  | 1 +
 virt/kvm/Kconfig      | 3 +++
 3 files changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 78eb9847008f..a6e2677724e1 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -21,6 +21,7 @@ config KVM
 	depends on HAVE_KVM && EXPERIMENTAL
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	---help---
 	  Support hosting paravirtualized guest machines using the SIE
 	  virtualization capability on the mainframe. This should work
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index a28f338843ea..45c044f0fff7 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -37,6 +37,7 @@ config KVM
 	select TASK_DELAY_ACCT
 	select PERF_EVENTS
 	select HAVE_KVM_MSI
+	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	---help---
 	  Support hosting fully virtualized guest machines using hardware
 	  virtualization extensions.  You will need a fairly recent
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 28694f4a9139..d01b24b72c61 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -21,3 +21,6 @@ config KVM_ASYNC_PF
 
 config HAVE_KVM_MSI
        bool
+
+config HAVE_KVM_CPU_RELAX_INTERCEPT
+       bool
-- 
cgit v1.2.3


From 3b2bd2f800ba9488d9ad493216a0c07d71055b56 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 26 Jul 2012 11:57:43 +0800
Subject: KVM: MMU: use kvm_release_pfn_clean to release pfn

The current code depends on the fact that fault_page is the normal page,
however, we will use the error code instead of these dummy pages in the
later patch, so we use kvm_release_pfn_clean to release pfn which will
release the error code properly

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 241993443599..a9a20528e700 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3275,7 +3275,7 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	put_page(pfn_to_page(*pfn));
+	kvm_release_pfn_clean(*pfn);
 
 	if (!prefault && can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
-- 
cgit v1.2.3


From f23b070e662ca55a8fdaaa28537af06cab664499 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Thu, 26 Jul 2012 13:12:22 +0800
Subject: KVM: x86 emulator: simplify read_emulated

No need split mmio read region into 8-bits pieces since we do it in
emulator_read_write_onepage

Changelog:
  Add a WARN_ON to check read-cache overflow

Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 85b611e13e84..2c5d1e65d9d1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1166,24 +1166,21 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	struct read_cache *mc = &ctxt->mem_read;
 
-	while (size) {
-		int n = min(size, 8u);
-		size -= n;
-		if (mc->pos < mc->end)
-			goto read_cached;
-
-		rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, n,
-					      &ctxt->exception);
-		if (rc != X86EMUL_CONTINUE)
-			return rc;
-		mc->end += n;
+	if (mc->pos < mc->end)
+		goto read_cached;
 
-	read_cached:
-		memcpy(dest, mc->data + mc->pos, n);
-		mc->pos += n;
-		dest += n;
-		addr += n;
-	}
+	WARN_ON((mc->end + size) >= sizeof(mc->data));
+
+	rc = ctxt->ops->read_emulated(ctxt, addr, mc->data + mc->end, size,
+				      &ctxt->exception);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+
+	mc->end += size;
+
+read_cached:
+	memcpy(dest, mc->data + mc->pos, size);
+	mc->pos += size;
 	return X86EMUL_CONTINUE;
 }
 
-- 
cgit v1.2.3


From 99245b507dc3b1b2815d6a6cb4e94a6b7018a24b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 25 Jul 2012 15:49:42 +0300
Subject: KVM: x86 emulator: drop unneeded call to get_segment()

setup_syscalls_segments() calls get_segment() and than overwrites all
but one of the structure fields and this one should also be overwritten
anyway, so we can drop call to get_segment() and avoid a couple of vmreads
on vmx. Also drop zeroing ss/cs structures since most of the fields are
set anyway. Just set those that were not set explicitly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 2c5d1e65d9d1..10f0136f50c1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2035,12 +2035,6 @@ static void
 setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 			struct desc_struct *cs, struct desc_struct *ss)
 {
-	u16 selector;
-
-	memset(cs, 0, sizeof(struct desc_struct));
-	ctxt->ops->get_segment(ctxt, &selector, cs, NULL, VCPU_SREG_CS);
-	memset(ss, 0, sizeof(struct desc_struct));
-
 	cs->l = 0;		/* will be adjusted later */
 	set_desc_base(cs, 0);	/* flat segment */
 	cs->g = 1;		/* 4kb granularity */
@@ -2050,6 +2044,7 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 	cs->dpl = 0;		/* will be adjusted later */
 	cs->p = 1;
 	cs->d = 1;
+	cs->avl = 0;
 
 	set_desc_base(ss, 0);	/* flat segment */
 	set_desc_limit(ss, 0xfffff);	/* 4GB limit */
@@ -2059,6 +2054,8 @@ setup_syscalls_segments(struct x86_emulate_ctxt *ctxt,
 	ss->d = 1;		/* 32bit stack segment */
 	ss->dpl = 0;
 	ss->p = 1;
+	ss->l = 0;
+	ss->avl = 0;
 }
 
 static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
-- 
cgit v1.2.3


From 23d43cf998275bc97437931c0cdee1df2c1aa3ca Mon Sep 17 00:00:00 2001
From: Christoffer Dall <c.dall@virtualopensystems.com>
Date: Tue, 24 Jul 2012 08:51:20 -0400
Subject: KVM: Move KVM_IRQ_LINE to arch-generic code

Handle KVM_IRQ_LINE and KVM_IRQ_LINE_STATUS in the generic
kvm_vm_ioctl() function and call into kvm_vm_ioctl_irq_line().

This is even more relevant when KVM/ARM also uses this ioctl.

Signed-off-by: Christoffer Dall <c.dall@virtualopensystems.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 33 ++++++++++-----------------------
 arch/x86/kvm/x86.c       | 33 ++++++++++-----------------------
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 23 +++++++++++++++++++++++
 4 files changed, 44 insertions(+), 46 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index bd77cb507c1c..eac65380bd20 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -924,6 +924,16 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return 0;
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level);
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		unsigned int ioctl, unsigned long arg)
 {
@@ -963,29 +973,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			goto out;
 		}
 		break;
-	case KVM_IRQ_LINE_STATUS:
-	case KVM_IRQ_LINE: {
-		struct kvm_irq_level irq_event;
-
-		r = -EFAULT;
-		if (copy_from_user(&irq_event, argp, sizeof irq_event))
-			goto out;
-		r = -ENXIO;
-		if (irqchip_in_kernel(kvm)) {
-			__s32 status;
-			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-				    irq_event.irq, irq_event.level);
-			if (ioctl == KVM_IRQ_LINE_STATUS) {
-				r = -EFAULT;
-				irq_event.status = status;
-				if (copy_to_user(argp, &irq_event,
-							sizeof irq_event))
-					goto out;
-			}
-			r = 0;
-		}
-		break;
-		}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip chip;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3d9d08edbf29..b6379e55ee27 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3165,6 +3165,16 @@ out:
 	return r;
 }
 
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event)
+{
+	if (!irqchip_in_kernel(kvm))
+		return -ENXIO;
+
+	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
+					irq_event->irq, irq_event->level);
+	return 0;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -3271,29 +3281,6 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	create_pit_unlock:
 		mutex_unlock(&kvm->slots_lock);
 		break;
-	case KVM_IRQ_LINE_STATUS:
-	case KVM_IRQ_LINE: {
-		struct kvm_irq_level irq_event;
-
-		r = -EFAULT;
-		if (copy_from_user(&irq_event, argp, sizeof irq_event))
-			goto out;
-		r = -ENXIO;
-		if (irqchip_in_kernel(kvm)) {
-			__s32 status;
-			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event.irq, irq_event.level);
-			if (ioctl == KVM_IRQ_LINE_STATUS) {
-				r = -EFAULT;
-				irq_event.status = status;
-				if (copy_to_user(argp, &irq_event,
-							sizeof irq_event))
-					goto out;
-			}
-			r = 0;
-		}
-		break;
-	}
 	case KVM_GET_IRQCHIP: {
 		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
 		struct kvm_irqchip *chip;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4e60d3695e4e..dbc65f9d6a2b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -498,6 +498,7 @@ int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 				   struct
 				   kvm_userspace_memory_region *mem,
 				   int user_alloc);
+int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level);
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg);
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index de89497fe4c7..bcf973ec98ff 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2148,6 +2148,29 @@ static long kvm_vm_ioctl(struct file *filp,
 		r = kvm_send_userspace_msi(kvm, &msi);
 		break;
 	}
+#endif
+#ifdef __KVM_HAVE_IRQ_LINE
+	case KVM_IRQ_LINE_STATUS:
+	case KVM_IRQ_LINE: {
+		struct kvm_irq_level irq_event;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_event, argp, sizeof irq_event))
+			goto out;
+
+		r = kvm_vm_ioctl_irq_line(kvm, &irq_event);
+		if (r)
+			goto out;
+
+		r = -EFAULT;
+		if (ioctl == KVM_IRQ_LINE_STATUS) {
+			if (copy_to_user(argp, &irq_event, sizeof irq_event))
+				goto out;
+		}
+
+		r = 0;
+		break;
+	}
 #endif
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-- 
cgit v1.2.3


From 7d0642b93780a7309d2954de6f6126d6ceb526f0 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 11 Jul 2012 15:03:18 -0400
Subject: xen/perf: Define .glob for the different hypercalls.

This allows us in perf to have this:

 99.67%  [kernel]             [k] xen_hypercall_sched_op
  0.11%  [kernel]             [k] xen_hypercall_xen_version

instead of the borring ever-encompassing:

 99.13%  [kernel]              [k] hypercall_page

[v2: Use a macro to define the name and skip]
[v3: Use balign per Jan's suggestion]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/xen-head.S | 56 +++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S
index aaa7291c9259..7faed5869e5b 100644
--- a/arch/x86/xen/xen-head.S
+++ b/arch/x86/xen/xen-head.S
@@ -28,9 +28,61 @@ ENTRY(startup_xen)
 	__FINIT
 
 .pushsection .text
-	.align PAGE_SIZE
+	.balign PAGE_SIZE
 ENTRY(hypercall_page)
-	.skip PAGE_SIZE
+#define NEXT_HYPERCALL(x) \
+	ENTRY(xen_hypercall_##x) \
+	.skip 32
+
+NEXT_HYPERCALL(set_trap_table)
+NEXT_HYPERCALL(mmu_update)
+NEXT_HYPERCALL(set_gdt)
+NEXT_HYPERCALL(stack_switch)
+NEXT_HYPERCALL(set_callbacks)
+NEXT_HYPERCALL(fpu_taskswitch)
+NEXT_HYPERCALL(sched_op_compat)
+NEXT_HYPERCALL(platform_op)
+NEXT_HYPERCALL(set_debugreg)
+NEXT_HYPERCALL(get_debugreg)
+NEXT_HYPERCALL(update_descriptor)
+NEXT_HYPERCALL(ni)
+NEXT_HYPERCALL(memory_op)
+NEXT_HYPERCALL(multicall)
+NEXT_HYPERCALL(update_va_mapping)
+NEXT_HYPERCALL(set_timer_op)
+NEXT_HYPERCALL(event_channel_op_compat)
+NEXT_HYPERCALL(xen_version)
+NEXT_HYPERCALL(console_io)
+NEXT_HYPERCALL(physdev_op_compat)
+NEXT_HYPERCALL(grant_table_op)
+NEXT_HYPERCALL(vm_assist)
+NEXT_HYPERCALL(update_va_mapping_otherdomain)
+NEXT_HYPERCALL(iret)
+NEXT_HYPERCALL(vcpu_op)
+NEXT_HYPERCALL(set_segment_base)
+NEXT_HYPERCALL(mmuext_op)
+NEXT_HYPERCALL(xsm_op)
+NEXT_HYPERCALL(nmi_op)
+NEXT_HYPERCALL(sched_op)
+NEXT_HYPERCALL(callback_op)
+NEXT_HYPERCALL(xenoprof_op)
+NEXT_HYPERCALL(event_channel_op)
+NEXT_HYPERCALL(physdev_op)
+NEXT_HYPERCALL(hvm_op)
+NEXT_HYPERCALL(sysctl)
+NEXT_HYPERCALL(domctl)
+NEXT_HYPERCALL(kexec_op)
+NEXT_HYPERCALL(tmem_op) /* 38 */
+ENTRY(xen_hypercall_rsvr)
+	.skip 320
+NEXT_HYPERCALL(mca) /* 48 */
+NEXT_HYPERCALL(arch_1)
+NEXT_HYPERCALL(arch_2)
+NEXT_HYPERCALL(arch_3)
+NEXT_HYPERCALL(arch_4)
+NEXT_HYPERCALL(arch_5)
+NEXT_HYPERCALL(arch_6)
+	.balign PAGE_SIZE
 .popsection
 
 	ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS,       .asciz "linux")
-- 
cgit v1.2.3


From 4a4541a40e1fe145c72c4b959fac524a5600d9fb Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 22 Jul 2012 17:41:00 +0300
Subject: KVM: Don't update PPR on any APIC read

The current code will update the PPR on almost any APIC read; however
that's only required if we read the PPR.

kvm_update_ppr() shows up in some profiles, albeit with a low usage (~1%).
This should reduce it further (it will still be called during interrupt
processing).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index fff7173f6a71..ad7fff7ad13c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -696,12 +696,14 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 
 		val = apic_get_tmcct(apic);
 		break;
-
+	case APIC_PROCPRI:
+		apic_update_ppr(apic);
+		val = apic_get_reg(apic, offset);
+		break;
 	case APIC_TASKPRI:
 		report_tpr_access(apic, false);
 		/* fall thru */
 	default:
-		apic_update_ppr(apic);
 		val = apic_get_reg(apic, offset);
 		break;
 	}
-- 
cgit v1.2.3


From e4ea3f6b1bf3d489674a3660db652636e50186f9 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Thu, 19 Jul 2012 13:04:47 -0400
Subject: ftrace/x86_32: Simplify parameter setup for ftrace_regs_caller

The final position of the stack after saving regs and setting up
the parameters for ftrace_regs_call, is the position of the pt_regs
needed for the 4th parameter. Instead of saving it into a temporary
reg and pushing the reg, simply push the stack pointer.

Link: http://lkml.kernel.org/r/1342702344.12353.16.camel@gandalf.stny.rr.com

Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46caa5649a5e..4dc301709e78 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1169,10 +1169,9 @@ ENTRY(ftrace_regs_caller)
 	movl $__KERNEL_CS,13*4(%esp)
 
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
-	movl 0x4(%ebp), %edx	/* Load parent ip (2cd parameter) */
-	lea  (%esp), %ecx
-	pushl %ecx		/* Save pt_regs as 4th parameter */
+	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
 	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
+	pushl %esp		/* Save pt_regs as 4th parameter */
 
 GLOBAL(ftrace_regs_call)
 	call ftrace_stub
-- 
cgit v1.2.3


From 5767cfeaa9ec7b67c802143394f3ad9f8b174eb8 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 3 Jul 2012 16:16:09 -0400
Subject: ftrace/x86: Remove function_trace_stop check from graph caller

The graph caller is called by the mcount callers, which already does
the check against the function_trace_stop variable. No reason to
check it again.

Link: http://lkml.kernel.org/r/20120711195745.588538769@goodmis.org

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 3 ---
 arch/x86/kernel/entry_64.S | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 4dc301709e78..061ac17ee974 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1241,9 +1241,6 @@ END(mcount)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
-	cmpl $0, function_trace_stop
-	jne ftrace_stub
-
 	pushl %eax
 	pushl %ecx
 	pushl %edx
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 52bda2e8f7aa..38308fa7d7e5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -213,9 +213,6 @@ END(mcount)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 ENTRY(ftrace_graph_caller)
-	cmpl $0, function_trace_stop
-	jne ftrace_stub
-
 	MCOUNT_SAVE_FRAME
 
 	leaq 8(%rbp), %rdi
-- 
cgit v1.2.3


From e52538965119319447c0800c534da73142c27be2 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 5 Jun 2012 19:28:38 +0900
Subject: kprobes/x86: ftrace based optimization for x86

Add function tracer based kprobe optimization support
handlers on x86. This allows kprobes to use function
tracer for probing on mcount call.

Link: http://lkml.kernel.org/r/20120605102838.27845.26317.stgit@localhost.localdomain

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>

[ Updated to new port of ftrace save regs functions ]

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/include/asm/kprobes.h |  1 +
 arch/x86/kernel/kprobes.c      | 48 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/kprobes.h        |  2 +-
 kernel/kprobes.c               |  2 +-
 4 files changed, 51 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h
index 547882539157..d3ddd17405d0 100644
--- a/arch/x86/include/asm/kprobes.h
+++ b/arch/x86/include/asm/kprobes.h
@@ -27,6 +27,7 @@
 #include <asm/insn.h>
 
 #define  __ARCH_WANT_KPROBES_INSN_SLOT
+#define  ARCH_SUPPORTS_KPROBES_ON_FTRACE
 
 struct pt_regs;
 struct kprobe;
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index e2f751efb7b1..47ae1023a93c 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1052,6 +1052,54 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return 0;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
+/* Ftrace callback handler for kprobes */
+void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+				     struct ftrace_ops *ops, struct pt_regs *regs)
+{
+	struct kprobe *p;
+	struct kprobe_ctlblk *kcb;
+	unsigned long flags;
+
+	/* Disable irq for emulating a breakpoint and avoiding preempt */
+	local_irq_save(flags);
+
+	p = get_kprobe((kprobe_opcode_t *)ip);
+	if (unlikely(!p) || kprobe_disabled(p))
+		goto end;
+
+	kcb = get_kprobe_ctlblk();
+	if (kprobe_running()) {
+		kprobes_inc_nmissed_count(p);
+	} else {
+		regs->ip += sizeof(kprobe_opcode_t);
+
+		__this_cpu_write(current_kprobe, p);
+		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+		if (p->pre_handler)
+			p->pre_handler(p, regs);
+
+		if (unlikely(p->post_handler)) {
+			/* Emulate singlestep as if there is a 5byte nop */
+			regs->ip = ip + MCOUNT_INSN_SIZE;
+			kcb->kprobe_status = KPROBE_HIT_SSDONE;
+			p->post_handler(p, regs, 0);
+		}
+		__this_cpu_write(current_kprobe, NULL);
+		regs->ip = ip;	/* Recover for next callback */
+	}
+end:
+	local_irq_restore(flags);
+}
+
+int __kprobes arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+	p->ainsn.insn = NULL;
+	p->ainsn.boostable = -1;
+	return 0;
+}
+#endif
+
 int __init arch_init_kprobes(void)
 {
 	return arch_init_optprobes();
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index aa0d05e852e3..23755ba42abc 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -318,7 +318,7 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table,
 #endif /* CONFIG_OPTPROBES */
 #ifdef KPROBES_CAN_USE_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
-				  struct pt_regs *regs);
+				  struct ftrace_ops *ops, struct pt_regs *regs);
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
 #endif
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 69c16efc315b..35b4315d84f5 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -921,7 +921,7 @@ static __kprobes struct kprobe *alloc_aggr_kprobe(struct kprobe *p)
 
 #ifdef KPROBES_CAN_USE_FTRACE
 static struct ftrace_ops kprobe_ftrace_ops __read_mostly = {
-	.regs_func = kprobe_ftrace_handler,
+	.func = kprobe_ftrace_handler,
 	.flags = FTRACE_OPS_FL_SAVE_REGS,
 };
 static int kprobe_ftrace_enabled;
-- 
cgit v1.2.3


From e9d90d472da97e1b1560bffb89578ba082c88a69 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:50 +0300
Subject: KVM: Remove internal timer abstraction

kvm_timer_fn(), the sole inhabitant of timer.c, is only used by lapic.c. Move
it there to make it easier to hack on it.

struct kvm_timer is a thin wrapper around hrtimer, and only adds obfuscation.
Move near its two users (with different names) to prepare for simplification.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/Makefile    |  2 +-
 arch/x86/kvm/i8254.c     |  8 ++++----
 arch/x86/kvm/i8254.h     | 18 +++++++++++++++++-
 arch/x86/kvm/kvm_timer.h | 18 ------------------
 arch/x86/kvm/lapic.c     | 30 +++++++++++++++++++++++++++++-
 arch/x86/kvm/lapic.h     | 17 ++++++++++++++++-
 arch/x86/kvm/timer.c     | 47 -----------------------------------------------
 7 files changed, 67 insertions(+), 73 deletions(-)
 delete mode 100644 arch/x86/kvm/kvm_timer.h
 delete mode 100644 arch/x86/kvm/timer.c

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4f579e8dcacf..04d30401c5cb 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,7 +12,7 @@ kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(addprefix ../../../virt/kvm/, async_pf.o)
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o timer.o cpuid.o pmu.o
+			   i8254.o cpuid.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index adba28f88d1a..1d8e75702d95 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -272,14 +272,14 @@ static void destroy_pit_timer(struct kvm_pit *pit)
 	flush_kthread_work(&pit->expired);
 }
 
-static bool kpit_is_periodic(struct kvm_timer *ktimer)
+static bool kpit_is_periodic(struct kvm_pit_timer *ktimer)
 {
 	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
 						 pit_timer);
 	return ps->is_periodic;
 }
 
-static struct kvm_timer_ops kpit_ops = {
+static struct kvm_pit_timer_ops kpit_ops = {
 	.is_periodic = kpit_is_periodic,
 };
 
@@ -322,7 +322,7 @@ static void pit_do_work(struct kthread_work *work)
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+	struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer);
 	struct kvm_pit *pt = ktimer->kvm->arch.vpit;
 
 	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
@@ -340,7 +340,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 {
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-	struct kvm_timer *pt = &ps->pit_timer;
+	struct kvm_pit_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index fdf40425ea1d..3351816e8b32 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,10 +21,26 @@ struct kvm_kpit_channel_state {
 	ktime_t count_load_time;
 };
 
+struct kvm_pit_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	u32 timer_mode_mask;
+	u64 tscdeadline;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_pit_timer_ops *t_ops;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_pit_timer_ops {
+	bool (*is_periodic)(struct kvm_pit_timer *);
+};
+
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
 	u32 flags;
-	struct kvm_timer pit_timer;
+	struct kvm_pit_timer pit_timer;
 	bool is_periodic;
 	u32    speaker_data_on;
 	struct mutex lock;
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h
deleted file mode 100644
index 497dbaa366d4..000000000000
--- a/arch/x86/kvm/kvm_timer.h
+++ /dev/null
@@ -1,18 +0,0 @@
-
-struct kvm_timer {
-	struct hrtimer timer;
-	s64 period; 				/* unit: ns */
-	u32 timer_mode_mask;
-	u64 tscdeadline;
-	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm_timer_ops *t_ops;
-	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
-	bool (*is_periodic)(struct kvm_timer *);
-};
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index ad7fff7ad13c..61ed32cd17c1 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1262,6 +1262,34 @@ static const struct kvm_io_device_ops apic_mmio_ops = {
 	.write    = apic_mmio_write,
 };
 
+static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
+{
+	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
+	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * There is a race window between reading and incrementing, but we do
+	 * not care about potentially losing timer events in the !reinject
+	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
+	 * in vcpu_enter_guest.
+	 */
+	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
+		atomic_inc(&ktimer->pending);
+		/* FIXME: this code should not know anything about vcpus */
+		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+	}
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+
+	if (ktimer->t_ops->is_periodic(ktimer)) {
+		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+		return HRTIMER_RESTART;
+	} else
+		return HRTIMER_NORESTART;
+}
+
 int kvm_create_lapic(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic;
@@ -1285,7 +1313,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS);
-	apic->lapic_timer.timer.function = kvm_timer_fn;
+	apic->lapic_timer.timer.function = apic_timer_fn;
 	apic->lapic_timer.t_ops = &lapic_timer_ops;
 	apic->lapic_timer.kvm = vcpu->kvm;
 	apic->lapic_timer.vcpu = vcpu;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 4af5405ae1e2..d7251c92ed42 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -2,10 +2,25 @@
 #define __KVM_X86_LAPIC_H
 
 #include "iodev.h"
-#include "kvm_timer.h"
 
 #include <linux/kvm_host.h>
 
+struct kvm_timer {
+	struct hrtimer timer;
+	s64 period; 				/* unit: ns */
+	u32 timer_mode_mask;
+	u64 tscdeadline;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm_timer_ops *t_ops;
+	struct kvm *kvm;
+	struct kvm_vcpu *vcpu;
+};
+
+struct kvm_timer_ops {
+	bool (*is_periodic)(struct kvm_timer *);
+};
+
 struct kvm_lapic {
 	unsigned long base_address;
 	struct kvm_io_device dev;
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c
deleted file mode 100644
index 6b85cc647f34..000000000000
--- a/arch/x86/kvm/timer.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Kernel-based Virtual Machine driver for Linux
- *
- * This module enables machines with Intel VT-x extensions to run virtual
- * machines without emulation or binary translation.
- *
- * timer support
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/hrtimer.h>
-#include <linux/atomic.h>
-#include "kvm_timer.h"
-
-enum hrtimer_restart kvm_timer_fn(struct hrtimer *data)
-{
-	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-	struct kvm_vcpu *vcpu = ktimer->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
-
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-	if (ktimer->t_ops->is_periodic(ktimer)) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
-		return HRTIMER_RESTART;
-	} else
-		return HRTIMER_NORESTART;
-}
-- 
cgit v1.2.3


From 2a6eac9638a92b61de04bac4233d8ca665ae96af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:51 +0300
Subject: KVM: Simplify kvm_timer

'reinject' is never initialized
't_ops' only serves as indirection to lapic_is_periodic; call that directly
   instead
'kvm' is never used
'vcpu' can be derived via container_of

Remove these fields.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 18 +++++-------------
 arch/x86/kvm/lapic.h |  8 --------
 2 files changed, 5 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 61ed32cd17c1..0cd431c85d38 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1214,10 +1214,8 @@ int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
  *----------------------------------------------------------------------
  */
 
-static bool lapic_is_periodic(struct kvm_timer *ktimer)
+static bool lapic_is_periodic(struct kvm_lapic *apic)
 {
-	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic,
-					      lapic_timer);
 	return apic_lvtt_period(apic);
 }
 
@@ -1253,10 +1251,6 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
 		kvm_apic_local_deliver(apic, APIC_LVT0);
 }
 
-static struct kvm_timer_ops lapic_timer_ops = {
-	.is_periodic = lapic_is_periodic,
-};
-
 static const struct kvm_io_device_ops apic_mmio_ops = {
 	.read     = apic_mmio_read,
 	.write    = apic_mmio_write,
@@ -1265,7 +1259,8 @@ static const struct kvm_io_device_ops apic_mmio_ops = {
 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
-	struct kvm_vcpu *vcpu = ktimer->vcpu;
+	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
+	struct kvm_vcpu *vcpu = apic->vcpu;
 	wait_queue_head_t *q = &vcpu->wq;
 
 	/*
@@ -1274,7 +1269,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
 	 * in vcpu_enter_guest.
 	 */
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
+	if (!atomic_read(&ktimer->pending)) {
 		atomic_inc(&ktimer->pending);
 		/* FIXME: this code should not know anything about vcpus */
 		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
@@ -1283,7 +1278,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
 	if (waitqueue_active(q))
 		wake_up_interruptible(q);
 
-	if (ktimer->t_ops->is_periodic(ktimer)) {
+	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		return HRTIMER_RESTART;
 	} else
@@ -1314,9 +1309,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
-	apic->lapic_timer.t_ops = &lapic_timer_ops;
-	apic->lapic_timer.kvm = vcpu->kvm;
-	apic->lapic_timer.vcpu = vcpu;
 
 	apic->base_address = APIC_DEFAULT_PHYS_BASE;
 	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index d7251c92ed42..166766fffd9f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -11,14 +11,6 @@ struct kvm_timer {
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm_timer_ops *t_ops;
-	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_timer_ops {
-	bool (*is_periodic)(struct kvm_timer *);
 };
 
 struct kvm_lapic {
-- 
cgit v1.2.3


From 9d9d2239bdecd525ce3eb6cbfe4abb925c98208c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:52 +0300
Subject: KVM: Simplify kvm_pit_timer

'timer_mode_mask' is unused
'tscdeadline' is unused
't_ops' only adds needless indirection
'vcpu' is unused

Remove.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8254.c | 14 +-------------
 arch/x86/kvm/i8254.h |  8 --------
 2 files changed, 1 insertion(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 1d8e75702d95..a9e187a5b199 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -272,17 +272,6 @@ static void destroy_pit_timer(struct kvm_pit *pit)
 	flush_kthread_work(&pit->expired);
 }
 
-static bool kpit_is_periodic(struct kvm_pit_timer *ktimer)
-{
-	struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state,
-						 pit_timer);
-	return ps->is_periodic;
-}
-
-static struct kvm_pit_timer_ops kpit_ops = {
-	.is_periodic = kpit_is_periodic,
-};
-
 static void pit_do_work(struct kthread_work *work)
 {
 	struct kvm_pit *pit = container_of(work, struct kvm_pit, expired);
@@ -330,7 +319,7 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 		queue_kthread_work(&pt->worker, &pt->expired);
 	}
 
-	if (ktimer->t_ops->is_periodic(ktimer)) {
+	if (pt->pit_state.is_periodic) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
 		return HRTIMER_RESTART;
 	} else
@@ -357,7 +346,6 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	ps->is_periodic = is_period;
 
 	pt->timer.function = pit_timer_fn;
-	pt->t_ops = &kpit_ops;
 	pt->kvm = ps->pit->kvm;
 
 	atomic_set(&pt->pending, 0);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index 3351816e8b32..c9bbcb889c40 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -24,17 +24,9 @@ struct kvm_kpit_channel_state {
 struct kvm_pit_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
-	u32 timer_mode_mask;
-	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
 	bool reinject;
-	struct kvm_pit_timer_ops *t_ops;
 	struct kvm *kvm;
-	struct kvm_vcpu *vcpu;
-};
-
-struct kvm_pit_timer_ops {
-	bool (*is_periodic)(struct kvm_pit_timer *);
 };
 
 struct kvm_kpit_state {
-- 
cgit v1.2.3


From 26ef19242f6e4d747a61b5fd8da72343838864e4 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 26 Jul 2012 18:01:53 +0300
Subject: KVM: fold kvm_pit_timer into kvm_kpit_state

One structure nests inside the other, providing no value at all.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8254.c | 52 +++++++++++++++++++++++++---------------------------
 arch/x86/kvm/i8254.h | 14 +++++---------
 arch/x86/kvm/x86.c   |  2 +-
 3 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index a9e187a5b199..11300d2fa714 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -108,7 +108,7 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	ktime_t remaining;
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
 
-	if (!ps->pit_timer.period)
+	if (!ps->period)
 		return 0;
 
 	/*
@@ -120,9 +120,9 @@ static s64 __kpit_elapsed(struct kvm *kvm)
 	 * itself with the initial count and continues counting
 	 * from there.
 	 */
-	remaining = hrtimer_get_remaining(&ps->pit_timer.timer);
-	elapsed = ps->pit_timer.period - ktime_to_ns(remaining);
-	elapsed = mod_64(elapsed, ps->pit_timer.period);
+	remaining = hrtimer_get_remaining(&ps->timer);
+	elapsed = ps->period - ktime_to_ns(remaining);
+	elapsed = mod_64(elapsed, ps->period);
 
 	return elapsed;
 }
@@ -238,12 +238,12 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian)
 	int value;
 
 	spin_lock(&ps->inject_lock);
-	value = atomic_dec_return(&ps->pit_timer.pending);
+	value = atomic_dec_return(&ps->pending);
 	if (value < 0)
 		/* spurious acks can be generated if, for example, the
 		 * PIC is being reset.  Handle it gracefully here
 		 */
-		atomic_inc(&ps->pit_timer.pending);
+		atomic_inc(&ps->pending);
 	else if (value > 0)
 		/* in this case, we had multiple outstanding pit interrupts
 		 * that we needed to inject.  Reinject
@@ -261,14 +261,14 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu)
 	if (!kvm_vcpu_is_bsp(vcpu) || !pit)
 		return;
 
-	timer = &pit->pit_state.pit_timer.timer;
+	timer = &pit->pit_state.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
 
 static void destroy_pit_timer(struct kvm_pit *pit)
 {
-	hrtimer_cancel(&pit->pit_state.pit_timer.timer);
+	hrtimer_cancel(&pit->pit_state.timer);
 	flush_kthread_work(&pit->expired);
 }
 
@@ -311,16 +311,16 @@ static void pit_do_work(struct kthread_work *work)
 
 static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 {
-	struct kvm_pit_timer *ktimer = container_of(data, struct kvm_pit_timer, timer);
-	struct kvm_pit *pt = ktimer->kvm->arch.vpit;
+	struct kvm_kpit_state *ps = container_of(data, struct kvm_kpit_state, timer);
+	struct kvm_pit *pt = ps->kvm->arch.vpit;
 
-	if (ktimer->reinject || !atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
+	if (ps->reinject || !atomic_read(&ps->pending)) {
+		atomic_inc(&ps->pending);
 		queue_kthread_work(&pt->worker, &pt->expired);
 	}
 
-	if (pt->pit_state.is_periodic) {
-		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
+	if (ps->is_periodic) {
+		hrtimer_add_expires_ns(&ps->timer, ps->period);
 		return HRTIMER_RESTART;
 	} else
 		return HRTIMER_NORESTART;
@@ -329,7 +329,6 @@ static enum hrtimer_restart pit_timer_fn(struct hrtimer *data)
 static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 {
 	struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state;
-	struct kvm_pit_timer *pt = &ps->pit_timer;
 	s64 interval;
 
 	if (!irqchip_in_kernel(kvm) || ps->flags & KVM_PIT_FLAGS_HPET_LEGACY)
@@ -340,18 +339,18 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	pr_debug("create pit timer, interval is %llu nsec\n", interval);
 
 	/* TODO The new value only affected after the retriggered */
-	hrtimer_cancel(&pt->timer);
+	hrtimer_cancel(&ps->timer);
 	flush_kthread_work(&ps->pit->expired);
-	pt->period = interval;
+	ps->period = interval;
 	ps->is_periodic = is_period;
 
-	pt->timer.function = pit_timer_fn;
-	pt->kvm = ps->pit->kvm;
+	ps->timer.function = pit_timer_fn;
+	ps->kvm = ps->pit->kvm;
 
-	atomic_set(&pt->pending, 0);
+	atomic_set(&ps->pending, 0);
 	ps->irq_ack = 1;
 
-	hrtimer_start(&pt->timer, ktime_add_ns(ktime_get(), interval),
+	hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
 		      HRTIMER_MODE_ABS);
 }
 
@@ -627,7 +626,7 @@ void kvm_pit_reset(struct kvm_pit *pit)
 	}
 	mutex_unlock(&pit->pit_state.lock);
 
-	atomic_set(&pit->pit_state.pit_timer.pending, 0);
+	atomic_set(&pit->pit_state.pending, 0);
 	pit->pit_state.irq_ack = 1;
 }
 
@@ -636,7 +635,7 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask)
 	struct kvm_pit *pit = container_of(kimn, struct kvm_pit, mask_notifier);
 
 	if (!mask) {
-		atomic_set(&pit->pit_state.pit_timer.pending, 0);
+		atomic_set(&pit->pit_state.pending, 0);
 		pit->pit_state.irq_ack = 1;
 	}
 }
@@ -694,12 +693,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 
 	pit_state = &pit->pit_state;
 	pit_state->pit = pit;
-	hrtimer_init(&pit_state->pit_timer.timer,
-		     CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	hrtimer_init(&pit_state->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
 	pit_state->irq_ack_notifier.gsi = 0;
 	pit_state->irq_ack_notifier.irq_acked = kvm_pit_ack_irq;
 	kvm_register_irq_ack_notifier(kvm, &pit_state->irq_ack_notifier);
-	pit_state->pit_timer.reinject = true;
+	pit_state->reinject = true;
 	mutex_unlock(&pit->pit_state.lock);
 
 	kvm_pit_reset(pit);
@@ -749,7 +747,7 @@ void kvm_free_pit(struct kvm *kvm)
 		kvm_unregister_irq_ack_notifier(kvm,
 				&kvm->arch.vpit->pit_state.irq_ack_notifier);
 		mutex_lock(&kvm->arch.vpit->pit_state.lock);
-		timer = &kvm->arch.vpit->pit_state.pit_timer.timer;
+		timer = &kvm->arch.vpit->pit_state.timer;
 		hrtimer_cancel(timer);
 		flush_kthread_work(&kvm->arch.vpit->expired);
 		kthread_stop(kvm->arch.vpit->worker_task);
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h
index c9bbcb889c40..dd1b16b611b0 100644
--- a/arch/x86/kvm/i8254.h
+++ b/arch/x86/kvm/i8254.h
@@ -21,19 +21,15 @@ struct kvm_kpit_channel_state {
 	ktime_t count_load_time;
 };
 
-struct kvm_pit_timer {
-	struct hrtimer timer;
-	s64 period; 				/* unit: ns */
-	atomic_t pending;			/* accumulated triggered timers */
-	bool reinject;
-	struct kvm *kvm;
-};
-
 struct kvm_kpit_state {
 	struct kvm_kpit_channel_state channels[3];
 	u32 flags;
-	struct kvm_pit_timer pit_timer;
 	bool is_periodic;
+	s64 period; 				/* unit: ns */
+	struct hrtimer timer;
+	atomic_t pending;			/* accumulated triggered timers */
+	bool reinject;
+	struct kvm *kvm;
 	u32    speaker_data_on;
 	struct mutex lock;
 	struct kvm_pit *pit;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b6379e55ee27..3a53bcc24f20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3082,7 +3082,7 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 	if (!kvm->arch.vpit)
 		return -ENXIO;
 	mutex_lock(&kvm->arch.vpit->pit_state.lock);
-	kvm->arch.vpit->pit_state.pit_timer.reinject = control->pit_reinject;
+	kvm->arch.vpit->pit_state.reinject = control->pit_reinject;
 	mutex_unlock(&kvm->arch.vpit->pit_state.lock);
 	return 0;
 }
-- 
cgit v1.2.3


From 7af6c2456851eb08d51d95de38ae8302994031e9 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 11 Jul 2012 14:20:51 +0300
Subject: crypto: arch/x86 - cleanup - remove unneeded crypto_alg.cra_list
 initializations

Initialization of cra_list is currently mixed, most ciphers initialize this
field and most shashes do not. Initialization however is not needed at all
since cra_list is initialized/overwritten in __crypto_register_alg() with
list_add(). Therefore perform cleanup to remove all unneeded initializations
of this field in 'arch/x86/crypto/'.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aes_glue.c                 |  1 -
 arch/x86/crypto/aesni-intel_glue.c         |  5 +----
 arch/x86/crypto/blowfish_glue.c            |  4 ----
 arch/x86/crypto/camellia_glue.c            |  6 ------
 arch/x86/crypto/ghash-clmulni-intel_glue.c |  2 --
 arch/x86/crypto/salsa20_glue.c             |  1 -
 arch/x86/crypto/serpent_avx_glue.c         | 10 ----------
 arch/x86/crypto/serpent_sse2_glue.c        | 10 ----------
 arch/x86/crypto/twofish_avx_glue.c         | 10 ----------
 arch/x86/crypto/twofish_glue.c             |  1 -
 arch/x86/crypto/twofish_glue_3way.c        |  5 -----
 11 files changed, 1 insertion(+), 54 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 59b37deb8c8d..aafe8ce0d65d 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -40,7 +40,6 @@ static struct crypto_alg aes_alg = {
 	.cra_blocksize		= AES_BLOCK_SIZE,
 	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(aes_alg.cra_list),
 	.cra_u	= {
 		.cipher	= {
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 34fdcff4d2c8..648347a05773 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1118,7 +1118,7 @@ MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id);
 
 static int __init aesni_init(void)
 {
-	int err, i;
+	int err;
 
 	if (!x86_match_cpu(aesni_cpu_id))
 		return -ENODEV;
@@ -1127,9 +1127,6 @@ static int __init aesni_init(void)
 	if (err)
 		return err;
 
-	for (i = 0; i < ARRAY_SIZE(aesni_algs); i++)
-		INIT_LIST_HEAD(&aesni_algs[i].cra_list);
-
 	return crypto_register_algs(aesni_algs, ARRAY_SIZE(aesni_algs));
 }
 
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 7967474de8f7..50ec333b70e6 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -367,7 +367,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_ctxsize		= sizeof(struct bf_ctx),
 	.cra_alignmask		= 0,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[0].cra_list),
 	.cra_u = {
 		.cipher = {
 			.cia_min_keysize	= BF_MIN_KEY_SIZE,
@@ -387,7 +386,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -407,7 +405,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
@@ -428,7 +425,6 @@ static struct crypto_alg bf_algs[4] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(bf_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= BF_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index eeb2b3b743e9..7a74d7bb326d 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1601,7 +1601,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_ctxsize		= sizeof(struct camellia_ctx),
 	.cra_alignmask		= 0,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[0].cra_list),
 	.cra_u			= {
 		.cipher = {
 			.cia_min_keysize = CAMELLIA_MIN_KEY_SIZE,
@@ -1621,7 +1620,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1641,7 +1639,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1662,7 +1659,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[3].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE,
@@ -1683,7 +1679,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[4].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -1707,7 +1702,6 @@ static struct crypto_alg camellia_algs[6] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(camellia_algs[5].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= CAMELLIA_MIN_KEY_SIZE * 2,
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index b4bf0a63b520..6759dd1135be 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -150,7 +150,6 @@ static struct shash_alg ghash_alg = {
 		.cra_blocksize		= GHASH_BLOCK_SIZE,
 		.cra_ctxsize		= sizeof(struct ghash_ctx),
 		.cra_module		= THIS_MODULE,
-		.cra_list		= LIST_HEAD_INIT(ghash_alg.base.cra_list),
 	},
 };
 
@@ -288,7 +287,6 @@ static struct ahash_alg ghash_async_alg = {
 			.cra_blocksize		= GHASH_BLOCK_SIZE,
 			.cra_type		= &crypto_ahash_type,
 			.cra_module		= THIS_MODULE,
-			.cra_list		= LIST_HEAD_INIT(ghash_async_alg.halg.base.cra_list),
 			.cra_init		= ghash_async_init_tfm,
 			.cra_exit		= ghash_async_exit_tfm,
 		},
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index bccb76d80987..a3a3c0205c16 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -97,7 +97,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize        =   sizeof(struct salsa20_ctx),
 	.cra_alignmask      =	3,
 	.cra_module         =   THIS_MODULE,
-	.cra_list           =   LIST_HEAD_INIT(alg.cra_list),
 	.cra_u              =   {
 		.blkcipher = {
 			.setkey         =   setkey,
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index b36bdac237eb..3f543a04cf1e 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -390,7 +390,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -410,7 +409,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -430,7 +428,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -451,7 +448,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -475,7 +471,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -496,7 +491,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -518,7 +512,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -541,7 +534,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -565,7 +557,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -590,7 +581,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index d679c8675f4a..9107a9908c41 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -393,7 +393,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -413,7 +412,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -433,7 +431,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
@@ -454,7 +451,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[3].cra_list),
 	.cra_exit		= lrw_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -478,7 +474,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
@@ -499,7 +494,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -521,7 +515,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -544,7 +537,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -568,7 +560,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -593,7 +584,6 @@ static struct crypto_alg serpent_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(serpent_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 782b67ddaf6a..e7708b5442e0 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -378,7 +378,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -398,7 +397,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -418,7 +416,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -439,7 +436,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -463,7 +459,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
@@ -484,7 +479,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[5].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -506,7 +500,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[6].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -529,7 +522,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[7].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -553,7 +545,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[8].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
@@ -578,7 +569,6 @@ static struct crypto_alg twofish_algs[10] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(twofish_algs[9].cra_list),
 	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index 359ae084275c..0a5202303501 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -70,7 +70,6 @@ static struct crypto_alg alg = {
 	.cra_ctxsize		=	sizeof(struct twofish_ctx),
 	.cra_alignmask		=	0,
 	.cra_module		=	THIS_MODULE,
-	.cra_list		=	LIST_HEAD_INIT(alg.cra_list),
 	.cra_u			=	{
 		.cipher = {
 			.cia_min_keysize	=	TF_MIN_KEY_SIZE,
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 15f9347316c8..aa3eb358b7e8 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -342,7 +342,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[0].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -362,7 +361,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[1].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -383,7 +381,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[2].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE,
@@ -404,7 +401,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[3].cra_list),
 	.cra_exit		= lrw_twofish_exit_tfm,
 	.cra_u = {
 		.blkcipher = {
@@ -426,7 +422,6 @@ static struct crypto_alg tf_algs[5] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_blkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_list		= LIST_HEAD_INIT(tf_algs[4].cra_list),
 	.cra_u = {
 		.blkcipher = {
 			.min_keysize	= TF_MIN_KEY_SIZE * 2,
-- 
cgit v1.2.3


From 4d6d6a2c850f89bc9283d02519cb536baba72032 Mon Sep 17 00:00:00 2001
From: Johannes Goetzfried
 <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Date: Wed, 11 Jul 2012 19:37:37 +0200
Subject: crypto: cast5 - add x86_64/avx assembler implementation

This patch adds a x86_64/avx assembler implementation of the Cast5 block
cipher. The implementation processes sixteen blocks in parallel (four 4 block
chunk AVX operations). The table-lookups are done in general-purpose registers.
For small blocksizes the functions from the generic module are called. A good
performance increase is provided for blocksizes greater or equal to 128B.

Patch has been tested with tcrypt and automated filesystem tests.

Tcrypt benchmark results:

Intel Core i5-2500 CPU (fam:6, model:42, step:7)

cast5-avx-x86_64 vs. cast5-generic
64bit key:
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec
16B     0.99x   0.99x   1.00x   1.00x   1.02x   1.01x
64B     1.00x   1.00x   0.98x   1.00x   1.01x   1.02x
256B    2.03x   2.01x   0.95x   2.11x   2.12x   2.13x
1024B   2.30x   2.24x   0.95x   2.29x   2.35x   2.35x
8192B   2.31x   2.27x   0.95x   2.31x   2.39x   2.39x

128bit key:
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec
16B     0.99x   0.99x   1.00x   1.00x   1.01x   1.01x
64B     1.00x   1.00x   0.98x   1.01x   1.02x   1.01x
256B    2.17x   2.13x   0.96x   2.19x   2.19x   2.19x
1024B   2.29x   2.32x   0.95x   2.34x   2.37x   2.38x
8192B   2.35x   2.32x   0.95x   2.35x   2.39x   2.39x

Signed-off-by: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                  |   2 +
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 322 ++++++++++++++++++
 arch/x86/crypto/cast5_avx_glue.c          | 530 ++++++++++++++++++++++++++++++
 crypto/Kconfig                            |  14 +
 crypto/testmgr.c                          |  60 ++++
 5 files changed, 928 insertions(+)
 create mode 100644 arch/x86/crypto/cast5-avx-x86_64-asm_64.S
 create mode 100644 arch/x86/crypto/cast5_avx_glue.c

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index e908e5de82d3..565e82b00142 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -32,6 +33,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
+cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..94693c877e3b
--- /dev/null
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -0,0 +1,322 @@
+/*
+ * Cast5 Cipher 16-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast5-avx-x86_64-asm_64.S"
+.text
+
+.extern cast5_s1
+.extern cast5_s2
+.extern cast5_s3
+.extern cast5_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(16*4)
+#define rr	((16*4)+16)
+
+/* s-boxes */
+#define s1	cast5_s1
+#define s2	cast5_s2
+#define s3	cast5_s3
+#define s4	cast5_s4
+
+/**********************************************************************
+  16-way AVX cast5
+ **********************************************************************/
+#define CTX %rdi
+
+#define RL1 %xmm0
+#define RR1 %xmm1
+#define RL2 %xmm2
+#define RR2 %xmm3
+#define RL3 %xmm4
+#define RR3 %xmm5
+#define RL4 %xmm6
+#define RR4 %xmm7
+
+#define RX %xmm8
+
+#define RKM  %xmm9
+#define RKRF %xmm10
+#define RKRR %xmm11
+
+#define RTMP  %xmm12
+#define RMASK %xmm13
+#define R32   %xmm14
+
+#define RID1  %rax
+#define RID1b %al
+#define RID2  %rbx
+#define RID2b %bl
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3) \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	shrq $16,	src;                     \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define F(a, x, op0, op1, op2, op3) \
+	op0	a,	RKM,  x;                 \
+	vpslld  RKRF,	x,    RTMP;              \
+	vpsrld  RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vpshufb	RMASK,	x,    x;                 \
+	vmovq		x,    RGI1;              \
+	vpsrldq $8,	x,    x;                 \
+	vmovq		x,    RGI2;              \
+	\
+	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
+	shrq $16,	RGI1;                    \
+	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
+	shlq $32,	RFS2;                    \
+	orq		RFS1, RFS2;              \
+	\
+	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
+	shrq $16,	RGI2;                    \
+	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
+	shlq $32,	RFS3;                    \
+	orq		RFS1, RFS3;              \
+	\
+	vmovq		RFS2, x;                 \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
+#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
+#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+
+#define subround(a, b, x, n, f) \
+	F ## f(b, x);  \
+	vpxor a, x, a;
+
+#define round(l, r, n, f) \
+	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
+	vpinsrb $0,	(kr+n)(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,            R32,  RKRR; \
+	subround(l ## 1, r ## 1, RX, n, f);          \
+	subround(l ## 2, r ## 2, RX, n, f);          \
+	subround(l ## 3, r ## 3, RX, n, f);          \
+	subround(l ## 4, r ## 4, RX, n, f);
+
+
+#define transpose_2x4(x0, x1, t0, t1) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t1; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1;
+
+#define inpack_blocks(in, x0, x1, t0, t1) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vpshufb RMASK, x0,	x0; \
+	vpshufb RMASK, x1,	x1; \
+	\
+	transpose_2x4(x0, x1, t0, t1)
+
+#define outunpack_blocks(out, x0, x1, t0, t1) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb RMASK,	x0, x0;           \
+	vpshufb RMASK,	x1, x1;           \
+	vmovdqu 	x0, (0*4*4)(out); \
+	vmovdqu		x1, (1*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, t0, t1) \
+	transpose_2x4(x0, x1, t0, t1) \
+	\
+	vpshufb RMASK,	x0, x0;               \
+	vpshufb RMASK,	x1, x1;               \
+	vpxor		(0*4*4)(out), x0, x0; \
+	vmovdqu 	x0, (0*4*4)(out);     \
+	vpxor		(1*4*4)(out), x1, x1; \
+	vmovdqu	        x1, (1*4*4)(out);
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.L32_mask:
+	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+
+.align 16
+.global __cast5_enc_blk_16way
+.type   __cast5_enc_blk_16way,@function;
+
+__cast5_enc_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
+	leaq (2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	round(RL, RR, 0, 1);
+	round(RR, RL, 1, 2);
+	round(RL, RR, 2, 3);
+	round(RR, RL, 3, 1);
+	round(RL, RR, 4, 2);
+	round(RR, RL, 5, 3);
+	round(RL, RR, 6, 1);
+	round(RR, RL, 7, 2);
+	round(RL, RR, 8, 3);
+	round(RR, RL, 9, 1);
+	round(RL, RR, 10, 2);
+	round(RR, RL, 11, 3);
+
+	movb rr(CTX), %al;
+	testb %al, %al;
+	jnz __skip_enc;
+
+	round(RL, RR, 12, 1);
+	round(RR, RL, 13, 2);
+	round(RL, RR, 14, 3);
+	round(RR, RL, 15, 1);
+
+__skip_enc:
+	popq %rcx;
+	popq %rbx;
+
+	testb %cl, %cl;
+	jnz __enc_xor16;
+
+	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
+
+__enc_xor16:
+	outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
+
+.align 16
+.global cast5_dec_blk_16way
+.type   cast5_dec_blk_16way,@function;
+
+cast5_dec_blk_16way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
+	leaq (2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	movb rr(CTX), %al;
+	testb %al, %al;
+	jnz __skip_dec;
+
+	round(RL, RR, 15, 1);
+	round(RR, RL, 14, 3);
+	round(RL, RR, 13, 2);
+	round(RR, RL, 12, 1);
+
+__skip_dec:
+	round(RL, RR, 11, 3);
+	round(RR, RL, 10, 2);
+	round(RL, RR, 9, 1);
+	round(RR, RL, 8, 3);
+	round(RL, RR, 7, 2);
+	round(RR, RL, 6, 1);
+	round(RL, RR, 5, 3);
+	round(RR, RL, 4, 2);
+	round(RL, RR, 3, 1);
+	round(RR, RL, 2, 3);
+	round(RL, RR, 1, 2);
+	round(RR, RL, 0, 1);
+
+	popq %rbx;
+
+	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
+	leaq (2*4*4)(%rsi), %rax;
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
+	leaq (2*4*4)(%rax), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+
+	ret;
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
new file mode 100644
index 000000000000..445aab06387b
--- /dev/null
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -0,0 +1,530 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast5 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast5.h>
+#include <crypto/cryptd.h>
+#include <crypto/ctr.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST5_PARALLEL_BLOCKS 16
+
+asmlinkage void __cast5_enc_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src, bool xor);
+asmlinkage void cast5_dec_blk_16way(struct cast5_ctx *ctx, u8 *dst,
+				    const u8 *src);
+
+static inline void cast5_enc_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, false);
+}
+
+static inline void cast5_enc_blk_xway_xor(struct cast5_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast5_enc_blk_16way(ctx, dst, src, true);
+}
+
+static inline void cast5_dec_blk_xway(struct cast5_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast5_dec_blk_16way(ctx, dst, src);
+}
+
+
+static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast5_fpu_end(bool fpu_enabled)
+{
+	return glue_fpu_end(fpu_enabled);
+}
+
+static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
+		     bool enc)
+{
+	bool fpu_enabled = false;
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes;
+	int err;
+
+	err = blkcipher_walk_virt(desc, walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk->nbytes)) {
+		u8 *wsrc = walk->src.virt.addr;
+		u8 *wdst = walk->dst.virt.addr;
+
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+
+		/* Process multi-block batch */
+		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+			do {
+				if (enc)
+					cast5_enc_blk_xway(ctx, wdst, wsrc);
+				else
+					cast5_dec_blk_xway(ctx, wdst, wsrc);
+
+				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
+				wdst += bsize * CAST5_PARALLEL_BLOCKS;
+				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+			if (nbytes < bsize)
+				goto done;
+		}
+
+		/* Handle leftovers */
+		do {
+			if (enc)
+				__cast5_encrypt(ctx, wdst, wsrc);
+			else
+				__cast5_decrypt(ctx, wdst, wsrc);
+
+			wsrc += bsize;
+			wdst += bsize;
+			nbytes -= bsize;
+		} while (nbytes >= bsize);
+
+done:
+		err = blkcipher_walk_done(desc, walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, true);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	return ecb_crypt(desc, &walk, false);
+}
+
+static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 *iv = (u64 *)walk->iv;
+
+	do {
+		*dst = *src ^ *iv;
+		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
+		iv = dst;
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+	*(u64 *)walk->iv ^= *iv;
+	return nbytes;
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+
+	while ((nbytes = walk.nbytes)) {
+		nbytes = __cbc_encrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	return err;
+}
+
+static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
+				  struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ivs[CAST5_PARALLEL_BLOCKS - 1];
+	u64 last_iv;
+	int i;
+
+	/* Start of the last block. */
+	src += nbytes / bsize - 1;
+	dst += nbytes / bsize - 1;
+
+	last_iv = *src;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
+			src -= CAST5_PARALLEL_BLOCKS - 1;
+			dst -= CAST5_PARALLEL_BLOCKS - 1;
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				ivs[i] = src[i];
+
+			cast5_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS - 1; i++)
+				*(dst + (i + 1)) ^= *(ivs + i);
+
+			nbytes -= bsize;
+			if (nbytes < bsize)
+				goto done;
+
+			*dst ^= *(src - 1);
+			src -= 1;
+			dst -= 1;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	for (;;) {
+		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
+
+		nbytes -= bsize;
+		if (nbytes < bsize)
+			break;
+
+		*dst ^= *(src - 1);
+		src -= 1;
+		dst -= 1;
+	}
+
+done:
+	*dst ^= *(u64 *)walk->iv;
+	*(u64 *)walk->iv = last_iv;
+
+	return nbytes;
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt(desc, &walk);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes)) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __cbc_decrypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+	return err;
+}
+
+static void ctr_crypt_final(struct blkcipher_desc *desc,
+			    struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 *ctrblk = walk->iv;
+	u8 keystream[CAST5_BLOCK_SIZE];
+	u8 *src = walk->src.virt.addr;
+	u8 *dst = walk->dst.virt.addr;
+	unsigned int nbytes = walk->nbytes;
+
+	__cast5_encrypt(ctx, keystream, ctrblk);
+	crypto_xor(keystream, src, nbytes);
+	memcpy(dst, keystream, nbytes);
+
+	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
+}
+
+static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
+				struct blkcipher_walk *walk)
+{
+	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	const unsigned int bsize = CAST5_BLOCK_SIZE;
+	unsigned int nbytes = walk->nbytes;
+	u64 *src = (u64 *)walk->src.virt.addr;
+	u64 *dst = (u64 *)walk->dst.virt.addr;
+	u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
+	__be64 ctrblocks[CAST5_PARALLEL_BLOCKS];
+	int i;
+
+	/* Process multi-block batch */
+	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
+		do {
+			/* create ctrblks for parallel encrypt */
+			for (i = 0; i < CAST5_PARALLEL_BLOCKS; i++) {
+				if (dst != src)
+					dst[i] = src[i];
+
+				ctrblocks[i] = cpu_to_be64(ctrblk++);
+			}
+
+			cast5_enc_blk_xway_xor(ctx, (u8 *)dst,
+					       (u8 *)ctrblocks);
+
+			src += CAST5_PARALLEL_BLOCKS;
+			dst += CAST5_PARALLEL_BLOCKS;
+			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
+		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
+
+		if (nbytes < bsize)
+			goto done;
+	}
+
+	/* Handle leftovers */
+	do {
+		if (dst != src)
+			*dst = *src;
+
+		ctrblocks[0] = cpu_to_be64(ctrblk++);
+
+		__cast5_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
+		*dst ^= ctrblocks[0];
+
+		src += 1;
+		dst += 1;
+		nbytes -= bsize;
+	} while (nbytes >= bsize);
+
+done:
+	*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
+	return nbytes;
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	bool fpu_enabled = false;
+	struct blkcipher_walk walk;
+	int err;
+
+	blkcipher_walk_init(&walk, dst, src, nbytes);
+	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
+		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
+		nbytes = __ctr_crypt(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, nbytes);
+	}
+
+	cast5_fpu_end(fpu_enabled);
+
+	if (walk.nbytes) {
+		ctr_crypt_final(desc, &walk);
+		err = blkcipher_walk_done(desc, &walk, 0);
+	}
+
+	return err;
+}
+
+
+static struct crypto_alg cast5_algs[6] = { {
+	.cra_name		= "__ecb-cast5-avx",
+	.cra_driver_name	= "__driver-ecb-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast5-avx",
+	.cra_driver_name	= "__driver-cbc-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast5-avx",
+	.cra_driver_name	= "__driver-ctr-cast5-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast5_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= cast5_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast5)",
+	.cra_driver_name	= "ecb-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast5)",
+	.cra_driver_name	= "cbc-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST5_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast5)",
+	.cra_driver_name	= "ctr-cast5-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST5_MIN_KEY_SIZE,
+			.max_keysize	= CAST5_MAX_KEY_SIZE,
+			.ivsize		= CAST5_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+} };
+
+static int __init cast5_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+static void __exit cast5_exit(void)
+{
+	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
+}
+
+module_init(cast5_init);
+module_exit(cast5_exit);
+
+MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast5");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index a3238051b03e..cda97fcaa822 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -692,6 +692,20 @@ config CRYPTO_CAST5
 	  The CAST5 encryption algorithm (synonymous with CAST-128) is
 	  described in RFC2144.
 
+config CRYPTO_CAST5_AVX_X86_64
+	tristate "CAST5 (CAST-128) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_CAST5
+	help
+	  The CAST5 encryption algorithm (synonymous with CAST-128) is
+	  described in RFC2144.
+
+	  This module provides the Cast5 cipher algorithm that processes
+	  sixteen blocks parallel using the AVX instruction set.
+
 config CRYPTO_CAST6
 	tristate "CAST6 (CAST-256) cipher algorithm"
 	select CRYPTO_ALGAPI
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 7a91e540563f..def0f430b667 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1534,6 +1534,21 @@ static int alg_test_null(const struct alg_test_desc *desc,
 /* Please keep this list sorted by algorithm name. */
 static const struct alg_test_desc alg_test_descs[] = {
 	{
+		.alg = "__cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
+	}, {
 		.alg = "__cbc-serpent-avx",
 		.test = alg_test_null,
 		.suite = {
@@ -1594,6 +1609,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1655,6 +1685,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-cast5-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-serpent-avx",
 		.test = alg_test_null,
@@ -1951,6 +1996,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast5-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-serpent-avx)",
 		.test = alg_test_null,
-- 
cgit v1.2.3


From 4ea1277d301eb776e321684cd4ea95116b4e8847 Mon Sep 17 00:00:00 2001
From: Johannes Goetzfried
 <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Date: Wed, 11 Jul 2012 19:38:57 +0200
Subject: crypto: cast6 - add x86_64/avx assembler implementation

This patch adds a x86_64/avx assembler implementation of the Cast6 block
cipher. The implementation processes eight blocks in parallel (two 4 block
chunk AVX operations). The table-lookups are done in general-purpose registers.
For small blocksizes the functions from the generic module are called. A good
performance increase is provided for blocksizes greater or equal to 128B.

Patch has been tested with tcrypt and automated filesystem tests.

Tcrypt benchmark results:

Intel Core i5-2500 CPU (fam:6, model:42, step:7)

cast6-avx-x86_64 vs. cast6-generic
128bit key:                                             (lrw:256bit)    (xts:256bit)
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec
16B     0.97x   1.00x   1.01x   1.01x   0.99x   0.97x   0.98x   1.01x   0.96x   0.98x
64B     0.98x   0.99x   1.02x   1.01x   0.99x   1.00x   1.01x   0.99x   1.00x   0.99x
256B    1.77x   1.84x   0.99x   1.85x   1.77x   1.77x   1.70x   1.74x   1.69x   1.72x
1024B   1.93x   1.95x   0.99x   1.96x   1.93x   1.93x   1.84x   1.85x   1.89x   1.87x
8192B   1.91x   1.95x   0.99x   1.97x   1.95x   1.91x   1.86x   1.87x   1.93x   1.90x

256bit key:                                             (lrw:384bit)    (xts:512bit)
size    ecb-enc ecb-dec cbc-enc cbc-dec ctr-enc ctr-dec lrw-enc lrw-dec xts-enc xts-dec
16B     0.97x   0.99x   1.02x   1.01x   0.98x   0.99x   1.00x   1.00x   0.98x   0.98x
64B     0.98x   0.99x   1.01x   1.00x   1.00x   1.00x   1.01x   1.01x   0.97x   1.00x
256B    1.77x   1.83x   1.00x   1.86x   1.79x   1.78x   1.70x   1.76x   1.71x   1.69x
1024B   1.92x   1.95x   0.99x   1.96x   1.93x   1.93x   1.83x   1.86x   1.89x   1.87x
8192B   1.94x   1.95x   0.99x   1.97x   1.95x   1.95x   1.87x   1.87x   1.93x   1.91x

Signed-off-by: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile                  |   2 +
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 335 +++++++++++++++
 arch/x86/crypto/cast6_avx_glue.c          | 648 ++++++++++++++++++++++++++++++
 crypto/Kconfig                            |  17 +
 crypto/testmgr.c                          |  60 +++
 5 files changed, 1062 insertions(+)
 create mode 100644 arch/x86/crypto/cast6-avx-x86_64-asm_64.S
 create mode 100644 arch/x86/crypto/cast6_avx_glue.c

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 565e82b00142..5bacb4a226ac 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_CAST5_AVX_X86_64) += cast5-avx-x86_64.o
+obj-$(CONFIG_CRYPTO_CAST6_AVX_X86_64) += cast6-avx-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o
 obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o
@@ -34,6 +35,7 @@ serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 cast5-avx-x86_64-y := cast5-avx-x86_64-asm_64.o cast5_avx_glue.o
+cast6-avx-x86_64-y := cast6-avx-x86_64-asm_64.o cast6_avx_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
 twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o
 twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o
diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
new file mode 100644
index 000000000000..d258ce0d2e06
--- /dev/null
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -0,0 +1,335 @@
+/*
+ * Cast6 Cipher 8-way parallel algorithm (AVX/x86_64)
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+.file "cast6-avx-x86_64-asm_64.S"
+.text
+
+.extern cast6_s1
+.extern cast6_s2
+.extern cast6_s3
+.extern cast6_s4
+
+/* structure of crypto context */
+#define km	0
+#define kr	(12*4*4)
+
+/* s-boxes */
+#define s1	cast6_s1
+#define s2	cast6_s2
+#define s3	cast6_s3
+#define s4	cast6_s4
+
+/**********************************************************************
+  8-way AVX cast6
+ **********************************************************************/
+#define CTX %rdi
+
+#define RA1 %xmm0
+#define RB1 %xmm1
+#define RC1 %xmm2
+#define RD1 %xmm3
+
+#define RA2 %xmm4
+#define RB2 %xmm5
+#define RC2 %xmm6
+#define RD2 %xmm7
+
+#define RX %xmm8
+
+#define RKM  %xmm9
+#define RKRF %xmm10
+#define RKRR %xmm11
+
+#define RTMP  %xmm12
+#define RMASK %xmm13
+#define R32   %xmm14
+
+#define RID1  %rax
+#define RID1b %al
+#define RID2  %rbx
+#define RID2b %bl
+
+#define RGI1   %rdx
+#define RGI1bl %dl
+#define RGI1bh %dh
+#define RGI2   %rcx
+#define RGI2bl %cl
+#define RGI2bh %ch
+
+#define RFS1  %r8
+#define RFS1d %r8d
+#define RFS2  %r9
+#define RFS2d %r9d
+#define RFS3  %r10
+#define RFS3d %r10d
+
+
+#define lookup_32bit(src, dst, op1, op2, op3) \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	movl		s1(, RID1, 4), dst ## d; \
+	op1		s2(, RID2, 4), dst ## d; \
+	shrq $16,	src;                     \
+	movb		src ## bl,     RID1b;    \
+	movb		src ## bh,     RID2b;    \
+	op2		s3(, RID1, 4), dst ## d; \
+	op3		s4(, RID2, 4), dst ## d;
+
+#define F(a, x, op0, op1, op2, op3) \
+	op0	a,	RKM,  x;                 \
+	vpslld  RKRF,	x,    RTMP;              \
+	vpsrld  RKRR,	x,    x;                 \
+	vpor	RTMP,	x,    x;                 \
+	\
+	vpshufb	RMASK,	x,    x;                 \
+	vmovq		x,    RGI1;              \
+	vpsrldq $8,	x,    x;                 \
+	vmovq		x,    RGI2;              \
+	\
+	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
+	shrq $16,	RGI1;                    \
+	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
+	shlq $32,	RFS2;                    \
+	orq		RFS1, RFS2;              \
+	\
+	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
+	shrq $16,	RGI2;                    \
+	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
+	shlq $32,	RFS3;                    \
+	orq		RFS1, RFS3;              \
+	\
+	vmovq		RFS2, x;                 \
+	vpinsrq $1,	RFS3, x, x;
+
+#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
+#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
+#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+
+#define qop(in, out, x, f) \
+	F ## f(in ## 1, x);          \
+	vpxor out ## 1, x, out ## 1; \
+	F ## f(in ## 2, x);          \
+	vpxor out ## 2, x, out ## 2; \
+
+#define Q(n) \
+	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RD, RC, RX, 1);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RC, RB, RX, 2);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RB, RA, RX, 3);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RA, RD, RX, 1);
+
+#define QBAR(n) \
+	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RA, RD, RX, 1);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RB, RA, RX, 3);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RC, RB, RX, 2);                                \
+	\
+	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
+	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
+	vpsubq		RKRF,                  R32,  RKRR; \
+	qop(RD, RC, RX, 1);
+
+
+#define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	vpunpckldq		x1, x0, t0; \
+	vpunpckhdq		x1, x0, t2; \
+	vpunpckldq		x3, x2, t1; \
+	vpunpckhdq		x3, x2, x3; \
+	\
+	vpunpcklqdq		t1, t0, x0; \
+	vpunpckhqdq		t1, t0, x1; \
+	vpunpcklqdq		x3, t2, x2; \
+	vpunpckhqdq		x3, t2, x3;
+
+#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+	vmovdqu (0*4*4)(in),	x0; \
+	vmovdqu (1*4*4)(in),	x1; \
+	vmovdqu (2*4*4)(in),	x2; \
+	vmovdqu (3*4*4)(in),	x3; \
+	vpshufb RMASK, x0,	x0; \
+	vpshufb RMASK, x1,	x1; \
+	vpshufb RMASK, x2,	x2; \
+	vpshufb RMASK, x3,	x3; \
+	\
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
+
+#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb RMASK,		x0, x0;       \
+	vpshufb RMASK,		x1, x1;       \
+	vpshufb RMASK,		x2, x2;       \
+	vpshufb RMASK,		x3, x3;       \
+	vmovdqu x0,		(0*4*4)(out); \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vmovdqu	x2,		(2*4*4)(out); \
+	vmovdqu	x3,		(3*4*4)(out);
+
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
+	\
+	vpshufb RMASK,		x0, x0;       \
+	vpshufb RMASK,		x1, x1;       \
+	vpshufb RMASK,		x2, x2;       \
+	vpshufb RMASK,		x3, x3;       \
+	vpxor (0*4*4)(out),	x0, x0;       \
+	vmovdqu	x0,		(0*4*4)(out); \
+	vpxor (1*4*4)(out),	x1, x1;       \
+	vmovdqu	x1,		(1*4*4)(out); \
+	vpxor (2*4*4)(out),	x2, x2;       \
+	vmovdqu x2,		(2*4*4)(out); \
+	vpxor (3*4*4)(out),	x3, x3;       \
+	vmovdqu x3,		(3*4*4)(out);
+
+.align 16
+.Lbswap_mask:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.L32_mask:
+	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+
+.align 16
+.global __cast6_enc_blk_8way
+.type   __cast6_enc_blk_8way,@function;
+
+__cast6_enc_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 *	%rcx: bool, if true: xor output
+	 */
+
+	pushq %rbx;
+	pushq %rcx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	Q(0);
+	Q(1);
+	Q(2);
+	Q(3);
+	Q(4);
+	Q(5);
+	QBAR(6);
+	QBAR(7);
+	QBAR(8);
+	QBAR(9);
+	QBAR(10);
+	QBAR(11);
+
+	popq %rcx;
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+
+	testb %cl, %cl;
+	jnz __enc_xor8;
+
+	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
+
+__enc_xor8:
+	outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
+
+.align 16
+.global cast6_dec_blk_8way
+.type   cast6_dec_blk_8way,@function;
+
+cast6_dec_blk_8way:
+	/* input:
+	 *	%rdi: ctx, CTX
+	 *	%rsi: dst
+	 *	%rdx: src
+	 */
+
+	pushq %rbx;
+
+	vmovdqu .Lbswap_mask, RMASK;
+	vmovdqu .L32_mask, R32;
+	vpxor RKRF, RKRF, RKRF;
+
+	leaq (4*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	xorq RID1, RID1;
+	xorq RID2, RID2;
+
+	Q(11);
+	Q(10);
+	Q(9);
+	Q(8);
+	Q(7);
+	Q(6);
+	QBAR(5);
+	QBAR(4);
+	QBAR(3);
+	QBAR(2);
+	QBAR(1);
+	QBAR(0);
+
+	popq %rbx;
+
+	leaq (4*4*4)(%rsi), %rax;
+	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+
+	ret;
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
new file mode 100644
index 000000000000..15e5f85a5011
--- /dev/null
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -0,0 +1,648 @@
+/*
+ * Glue Code for the AVX assembler implemention of the Cast6 Cipher
+ *
+ * Copyright (C) 2012 Johannes Goetzfried
+ *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
+ * USA
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/hardirq.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/algapi.h>
+#include <crypto/cast6.h>
+#include <crypto/cryptd.h>
+#include <crypto/b128ops.h>
+#include <crypto/ctr.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
+#include <asm/xcr.h>
+#include <asm/xsave.h>
+#include <asm/crypto/ablk_helper.h>
+#include <asm/crypto/glue_helper.h>
+
+#define CAST6_PARALLEL_BLOCKS 8
+
+asmlinkage void __cast6_enc_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				     const u8 *src, bool xor);
+asmlinkage void cast6_dec_blk_8way(struct cast6_ctx *ctx, u8 *dst,
+				   const u8 *src);
+
+static inline void cast6_enc_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, false);
+}
+
+static inline void cast6_enc_blk_xway_xor(struct cast6_ctx *ctx, u8 *dst,
+					  const u8 *src)
+{
+	__cast6_enc_blk_8way(ctx, dst, src, true);
+}
+
+static inline void cast6_dec_blk_xway(struct cast6_ctx *ctx, u8 *dst,
+				      const u8 *src)
+{
+	cast6_dec_blk_8way(ctx, dst, src);
+}
+
+
+static void cast6_decrypt_cbc_xway(void *ctx, u128 *dst, const u128 *src)
+{
+	u128 ivs[CAST6_PARALLEL_BLOCKS - 1];
+	unsigned int j;
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		ivs[j] = src[j];
+
+	cast6_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
+
+	for (j = 0; j < CAST6_PARALLEL_BLOCKS - 1; j++)
+		u128_xor(dst + (j + 1), dst + (j + 1), ivs + j);
+}
+
+static void cast6_crypt_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv)
+{
+	be128 ctrblk;
+
+	u128_to_be128(&ctrblk, iv);
+	u128_inc(iv);
+
+	__cast6_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
+	u128_xor(dst, src, (u128 *)&ctrblk);
+}
+
+static void cast6_crypt_ctr_xway(void *ctx, u128 *dst, const u128 *src,
+				   u128 *iv)
+{
+	be128 ctrblks[CAST6_PARALLEL_BLOCKS];
+	unsigned int i;
+
+	for (i = 0; i < CAST6_PARALLEL_BLOCKS; i++) {
+		if (dst != src)
+			dst[i] = src[i];
+
+		u128_to_be128(&ctrblks[i], iv);
+		u128_inc(iv);
+	}
+
+	cast6_enc_blk_xway_xor(ctx, (u8 *)dst, (u8 *)ctrblks);
+}
+
+static const struct common_glue_ctx cast6_enc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_enc_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_encrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_ctr = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(cast6_crypt_ctr) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(cast6_dec_blk_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .ecb = GLUE_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static const struct common_glue_ctx cast6_dec_cbc = {
+	.num_funcs = 2,
+	.fpu_blocks_limit = CAST6_PARALLEL_BLOCKS,
+
+	.funcs = { {
+		.num_blocks = CAST6_PARALLEL_BLOCKS,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(cast6_decrypt_cbc_xway) }
+	}, {
+		.num_blocks = 1,
+		.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(__cast6_decrypt) }
+	} }
+};
+
+static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_enc, desc, dst, src, nbytes);
+}
+
+static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ecb_crypt_128bit(&cast6_dec, desc, dst, src, nbytes);
+}
+
+static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(__cast6_encrypt), desc,
+				       dst, src, nbytes);
+}
+
+static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_cbc_decrypt_128bit(&cast6_dec_cbc, desc, dst, src,
+				       nbytes);
+}
+
+static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		     struct scatterlist *src, unsigned int nbytes)
+{
+	return glue_ctr_crypt_128bit(&cast6_ctr, desc, dst, src, nbytes);
+}
+
+static inline bool cast6_fpu_begin(bool fpu_enabled, unsigned int nbytes)
+{
+	return glue_fpu_begin(CAST6_BLOCK_SIZE, CAST6_PARALLEL_BLOCKS,
+			      NULL, fpu_enabled, nbytes);
+}
+
+static inline void cast6_fpu_end(bool fpu_enabled)
+{
+	glue_fpu_end(fpu_enabled);
+}
+
+struct crypt_priv {
+	struct cast6_ctx *ctx;
+	bool fpu_enabled;
+};
+
+static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_enc_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_encrypt(ctx->ctx, srcdst, srcdst);
+}
+
+static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
+{
+	const unsigned int bsize = CAST6_BLOCK_SIZE;
+	struct crypt_priv *ctx = priv;
+	int i;
+
+	ctx->fpu_enabled = cast6_fpu_begin(ctx->fpu_enabled, nbytes);
+
+	if (nbytes == bsize * CAST6_PARALLEL_BLOCKS) {
+		cast6_dec_blk_xway(ctx->ctx, srcdst, srcdst);
+		return;
+	}
+
+	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
+		__cast6_decrypt(ctx->ctx, srcdst, srcdst);
+}
+
+struct cast6_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	struct cast6_ctx cast6_ctx;
+};
+
+static int lrw_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = __cast6_setkey(&ctx->cast6_ctx, key, keylen - CAST6_BLOCK_SIZE,
+			     &tfm->crt_flags);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - CAST6_BLOCK_SIZE);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->cast6_ctx,
+		.fpu_enabled = false,
+	};
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static void lrw_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct cast6_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+struct cast6_xts_ctx {
+	struct cast6_ctx tweak_ctx;
+	struct cast6_ctx crypt_ctx;
+};
+
+static int xts_cast6_setkey(struct crypto_tfm *tfm, const u8 *key,
+			      unsigned int keylen)
+{
+	struct cast6_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = __cast6_setkey(&ctx->crypt_ctx, key, keylen / 2, flags);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return __cast6_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2,
+			      flags);
+}
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct cast6_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[CAST6_PARALLEL_BLOCKS];
+	struct crypt_priv crypt_ctx = {
+		.ctx = &ctx->crypt_ctx,
+		.fpu_enabled = false,
+	};
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = &ctx->tweak_ctx,
+		.tweak_fn = XTS_TWEAK_CAST(__cast6_encrypt),
+		.crypt_ctx = &crypt_ctx,
+		.crypt_fn = decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	cast6_fpu_end(crypt_ctx.fpu_enabled);
+
+	return ret;
+}
+
+static struct crypto_alg cast6_algs[10] = { {
+	.cra_name		= "__ecb-cast6-avx",
+	.cra_driver_name	= "__driver-ecb-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ecb_encrypt,
+			.decrypt	= ecb_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__cbc-cast6-avx",
+	.cra_driver_name	= "__driver-cbc-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= cbc_encrypt,
+			.decrypt	= cbc_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__ctr-cast6-avx",
+	.cra_driver_name	= "__driver-ctr-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct cast6_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= cast6_setkey,
+			.encrypt	= ctr_crypt,
+			.decrypt	= ctr_crypt,
+		},
+	},
+}, {
+	.cra_name		= "__lrw-cast6-avx",
+	.cra_driver_name	= "__driver-lrw-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= lrw_cast6_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-cast6-avx",
+	.cra_driver_name	= "__driver-xts-cast6-avx",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct cast6_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= xts_cast6_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ecb(cast6)",
+	.cra_driver_name	= "ecb-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "cbc(cast6)",
+	.cra_driver_name	= "cbc-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= __ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "ctr(cast6)",
+	.cra_driver_name	= "ctr-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_encrypt,
+			.geniv		= "chainiv",
+		},
+	},
+}, {
+	.cra_name		= "lrw(cast6)",
+	.cra_driver_name	= "lrw-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.max_keysize	= CAST6_MAX_KEY_SIZE +
+					  CAST6_BLOCK_SIZE,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "xts(cast6)",
+	.cra_driver_name	= "xts-cast6-avx",
+	.cra_priority		= 200,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
+	.cra_blocksize		= CAST6_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct async_helper_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_init		= ablk_init,
+	.cra_exit		= ablk_exit,
+	.cra_u = {
+		.ablkcipher = {
+			.min_keysize	= CAST6_MIN_KEY_SIZE * 2,
+			.max_keysize	= CAST6_MAX_KEY_SIZE * 2,
+			.ivsize		= CAST6_BLOCK_SIZE,
+			.setkey		= ablk_set_key,
+			.encrypt	= ablk_encrypt,
+			.decrypt	= ablk_decrypt,
+		},
+	},
+} };
+
+static int __init cast6_init(void)
+{
+	u64 xcr0;
+
+	if (!cpu_has_avx || !cpu_has_osxsave) {
+		pr_info("AVX instructions are not detected.\n");
+		return -ENODEV;
+	}
+
+	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
+	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
+		pr_info("AVX detected but unusable.\n");
+		return -ENODEV;
+	}
+
+	return crypto_register_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+static void __exit cast6_exit(void)
+{
+	crypto_unregister_algs(cast6_algs, ARRAY_SIZE(cast6_algs));
+}
+
+module_init(cast6_init);
+module_exit(cast6_exit);
+
+MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("cast6");
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cda97fcaa822..fe8ed62efe2f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -713,6 +713,23 @@ config CRYPTO_CAST6
 	  The CAST6 encryption algorithm (synonymous with CAST-256) is
 	  described in RFC2612.
 
+config CRYPTO_CAST6_AVX_X86_64
+	tristate "CAST6 (CAST-256) cipher algorithm (x86_64/AVX)"
+	depends on X86 && 64BIT
+	select CRYPTO_ALGAPI
+	select CRYPTO_CRYPTD
+	select CRYPTO_ABLK_HELPER_X86
+	select CRYPTO_GLUE_HELPER_X86
+	select CRYPTO_CAST6
+	select CRYPTO_LRW
+	select CRYPTO_XTS
+	help
+	  The CAST6 encryption algorithm (synonymous with CAST-256) is
+	  described in RFC2612.
+
+	  This module provides the Cast6 cipher algorithm that processes
+	  eight blocks parallel using the AVX instruction set.
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index cff3c1c3f83c..575b57c3244b 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1548,6 +1548,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1624,6 +1639,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-cbc-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-cbc-serpent-avx",
 		.test = alg_test_null,
@@ -1700,6 +1730,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "__driver-ecb-cast6-avx",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "__driver-ecb-serpent-avx",
 		.test = alg_test_null,
@@ -2026,6 +2071,21 @@ static const struct alg_test_desc alg_test_descs[] = {
 				}
 			}
 		}
+	}, {
+		.alg = "cryptd(__driver-ecb-cast6-avx)",
+		.test = alg_test_null,
+		.suite = {
+			.cipher = {
+				.enc = {
+					.vecs = NULL,
+					.count = 0
+				},
+				.dec = {
+					.vecs = NULL,
+					.count = 0
+				}
+			}
+		}
 	}, {
 		.alg = "cryptd(__driver-ecb-serpent-avx)",
 		.test = alg_test_null,
-- 
cgit v1.2.3


From e115676e042f4d9268c6b6d8cb7dc962aa6cfd7d Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 1 Aug 2012 17:01:42 +0300
Subject: KVM: x86: update KVM_SAVE_MSRS_BEGIN to correct value

When MSR_KVM_PV_EOI_EN was added to msrs_to_save array
KVM_SAVE_MSRS_BEGIN was not updated accordingly.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3a53bcc24f20..a87c82aa3196 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -806,7 +806,7 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	9
+#define KVM_SAVE_MSRS_BEGIN	10
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
-- 
cgit v1.2.3


From ea22571c8fd912f28e2525f7112bbb84b474ff3a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:37 -0400
Subject: x86: mce: Disable preemption when calling raise_local()

raise_mce() has a code path which does not disable preemption when the
raise_local() is called. The per cpu variable access in raise_local()
depends on preemption being disabled to be functional. So that code
path was either never tested or never tested with CONFIG_DEBUG_PREEMPT
enabled.

Add the missing preempt_disable/enable() pair around the call.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index fc4beb393577..753746f6dbd8 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -194,7 +194,11 @@ static void raise_mce(struct mce *m)
 		put_online_cpus();
 	} else
 #endif
+	{
+		preempt_disable();
 		raise_local();
+		preempt_enable();
+	}
 }
 
 /* Error injection interface */
-- 
cgit v1.2.3


From b5975917a3e5f93b5d1c95561aab0aa44327baea Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:38 -0400
Subject: x86: mce: Serialize mce injection

raise_mce() fiddles with global state, but lacks any kind of
serialization.

Add a mutex around the raise_mce() call, so concurrent writers do not
stomp on each other toes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-inject.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 753746f6dbd8..ddc72f839332 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -78,6 +78,7 @@ static void raise_exception(struct mce *m, struct pt_regs *pregs)
 }
 
 static cpumask_var_t mce_inject_cpumask;
+static DEFINE_MUTEX(mce_inject_mutex);
 
 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
 {
@@ -229,7 +230,10 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
 	 * so do it a jiffie or two later everywhere.
 	 */
 	schedule_timeout(2);
+
+	mutex_lock(&mce_inject_mutex);
 	raise_mce(&m);
+	mutex_unlock(&mce_inject_mutex);
 	return usize;
 }
 
-- 
cgit v1.2.3


From 26c3c283c5b08dd250279c06ba3ab5b094bbacc3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:39 -0400
Subject: x86: mce: Split timer init

Split timer init function into the init and the start part, so the
start part can replace the open coded version in CPU_DOWN_FAILED.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 5e095f873e3e..bd3a5e850f4f 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1557,23 +1557,28 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	}
 }
 
-static void __mcheck_cpu_init_timer(void)
+static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	struct timer_list *t = &__get_cpu_var(mce_timer);
 	unsigned long iv = check_interval * HZ;
 
-	setup_timer(t, mce_timer_fn, smp_processor_id());
+	__this_cpu_write(mce_next_interval, iv);
 
-	if (mce_ignore_ce)
+	if (mce_ignore_ce || !iv)
 		return;
 
-	__this_cpu_write(mce_next_interval, iv);
-	if (!iv)
-		return;
 	t->expires = round_jiffies(jiffies + iv);
 	add_timer_on(t, smp_processor_id());
 }
 
+static void __mcheck_cpu_init_timer(void)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	unsigned int cpu = smp_processor_id();
+
+	setup_timer(t, mce_timer_fn, cpu);
+	mce_start_timer(cpu, t);
+}
+
 /* Handle unconfigured int18 (should never happen) */
 static void unexpected_machine_check(struct pt_regs *regs, long error_code)
 {
@@ -2277,12 +2282,8 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 	case CPU_DOWN_FAILED:
 	case CPU_DOWN_FAILED_FROZEN:
-		if (!mce_ignore_ce && check_interval) {
-			t->expires = round_jiffies(jiffies +
-					per_cpu(mce_next_interval, cpu));
-			add_timer_on(t, cpu);
-		}
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
+		mce_start_timer(cpu, t);
 		break;
 	case CPU_POST_DEAD:
 		/* intentionally ignoring frozen here */
-- 
cgit v1.2.3


From 1a65f970d10ace7a1e399f9061a65679c0ae57d0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 19 Jul 2012 13:59:40 -0400
Subject: x86: mce: Remove the frozen cases in the hotplug code

No point in having double cases if we can simply mask the FROZEN bit
out.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index bd3a5e850f4f..b4dde1527edd 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2262,34 +2262,32 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	unsigned int cpu = (unsigned long)hcpu;
 	struct timer_list *t = &per_cpu(mce_timer, cpu);
 
-	switch (action) {
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
 		mce_device_create(cpu);
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		break;
 	case CPU_DEAD:
-	case CPU_DEAD_FROZEN:
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		mce_device_remove(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
 		del_timer_sync(t);
 		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
 		break;
 	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
 		mce_start_timer(cpu, t);
 		break;
-	case CPU_POST_DEAD:
+	}
+
+	if (action == CPU_POST_DEAD) {
 		/* intentionally ignoring frozen here */
 		cmci_rediscover(cpu);
-		break;
 	}
+
 	return NOTIFY_OK;
 }
 
-- 
cgit v1.2.3


From aab2eb7a38e0e510874acca01838f5badbca6c7e Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:01:10 +0900
Subject: KVM: Stop checking rmap to see if slot is being created

Instead, check npages consistently.  This helps to make rmap
architecture specific in a later patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3ca90d74711d..abc039d78428 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6385,7 +6385,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 	 *x86 needs to handle !user_alloc case.
 	 */
 	if (!user_alloc) {
-		if (npages && !old.rmap) {
+		if (npages && !old.npages) {
 			unsigned long userspace_addr;
 
 			userspace_addr = vm_mmap(NULL, 0,
@@ -6413,7 +6413,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 
 	int nr_mmu_pages = 0, npages = mem->memory_size >> PAGE_SHIFT;
 
-	if (!user_alloc && !old.user_alloc && old.rmap && !npages) {
+	if (!user_alloc && !old.user_alloc && old.npages && !npages) {
 		int ret;
 
 		ret = vm_munmap(old.userspace_addr,
-- 
cgit v1.2.3


From 65fbe37c42ed75604c9a770639209dcee162ebe7 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:02:01 +0900
Subject: KVM: MMU: Use gfn_to_rmap() instead of directly reading rmap array

This helps to make rmap architecture specific in a later patch.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       | 3 ++-
 arch/x86/kvm/mmu_audit.c | 4 +---
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index a9a20528e700..ee768bb2367f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1181,7 +1181,8 @@ void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
 	unsigned long *rmapp;
 
 	while (mask) {
-		rmapp = &slot->rmap[gfn_offset + __ffs(mask)];
+		rmapp = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
+				      PT_PAGE_TABLE_LEVEL, slot);
 		__rmap_write_protect(kvm, rmapp, PT_PAGE_TABLE_LEVEL, false);
 
 		/* clear the first set bit */
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index 7d7d0b9e23eb..ca403f9bb0f2 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -190,7 +190,6 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	struct kvm_memory_slot *slot;
 	unsigned long *rmapp;
 	u64 *sptep;
 	struct rmap_iterator iter;
@@ -198,8 +197,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp)
 	if (sp->role.direct || sp->unsync || sp->role.invalid)
 		return;
 
-	slot = gfn_to_memslot(kvm, sp->gfn);
-	rmapp = &slot->rmap[sp->gfn - slot->base_gfn];
+	rmapp = gfn_to_rmap(kvm, sp->gfn, PT_PAGE_TABLE_LEVEL);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
-- 
cgit v1.2.3


From d89cc617b954aff4030fce178f7d86f59aaf713d Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 1 Aug 2012 18:03:28 +0900
Subject: KVM: Push rmap into kvm_arch_memory_slot

Two reasons:
 - x86 can integrate rmap and rmap_pde and remove heuristics in
   __gfn_to_rmap().
 - Some architectures do not need rmap.

Since rmap is one of the most memory consuming stuff in KVM, ppc'd
better restrict the allocation to Book3S HV.

Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Acked-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  6 ++--
 arch/powerpc/kvm/book3s_hv_rm_mmu.c |  4 +--
 arch/powerpc/kvm/powerpc.c          |  8 ++++++
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  |  5 +---
 arch/x86/kvm/x86.c                  | 55 +++++++++++++++++++++----------------
 include/linux/kvm_host.h            |  1 -
 virt/kvm/kvm_main.c                 | 11 +-------
 9 files changed, 49 insertions(+), 44 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 572ad0141268..a29e0918172a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -221,6 +221,7 @@ struct revmap_entry {
 #define KVMPPC_GOT_PAGE		0x80
 
 struct kvm_arch_memory_slot {
+	unsigned long *rmap;
 };
 
 struct kvm_arch {
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 3c635c0616b0..d95d11322a15 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -705,7 +705,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		goto out_unlock;
 	hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
 
-	rmap = &memslot->rmap[gfn - memslot->base_gfn];
+	rmap = &memslot->arch.rmap[gfn - memslot->base_gfn];
 	lock_rmap(rmap);
 
 	/* Check if we might have been invalidated; let the guest retry if so */
@@ -788,7 +788,7 @@ static int kvm_handle_hva_range(struct kvm *kvm,
 		for (; gfn < gfn_end; ++gfn) {
 			gfn_t gfn_offset = gfn - memslot->base_gfn;
 
-			ret = handler(kvm, &memslot->rmap[gfn_offset], gfn);
+			ret = handler(kvm, &memslot->arch.rmap[gfn_offset], gfn);
 			retval |= ret;
 		}
 	}
@@ -1036,7 +1036,7 @@ long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 	unsigned long *rmapp, *map;
 
 	preempt_disable();
-	rmapp = memslot->rmap;
+	rmapp = memslot->arch.rmap;
 	map = memslot->dirty_bitmap;
 	for (i = 0; i < memslot->npages; ++i) {
 		if (kvm_test_clear_dirty(kvm, rmapp))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5c70d19494f9..56ac1a5d9912 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -84,7 +84,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index,
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return;
 
-	rmap = real_vmalloc_addr(&memslot->rmap[gfn - memslot->base_gfn]);
+	rmap = real_vmalloc_addr(&memslot->arch.rmap[gfn - memslot->base_gfn]);
 	lock_rmap(rmap);
 
 	head = *rmap & KVMPPC_RMAP_INDEX;
@@ -180,7 +180,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
 	if (!slot_is_aligned(memslot, psize))
 		return H_PARAMETER;
 	slot_fn = gfn - memslot->base_gfn;
-	rmap = &memslot->rmap[slot_fn];
+	rmap = &memslot->arch.rmap[slot_fn];
 
 	if (!kvm->arch.using_mmu_notifiers) {
 		physp = kvm->arch.slot_phys[memslot->id];
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 87f4dc886076..879b14a61403 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -302,10 +302,18 @@ long kvm_arch_dev_ioctl(struct file *filp,
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 			   struct kvm_memory_slot *dont)
 {
+	if (!dont || free->arch.rmap != dont->arch.rmap) {
+		vfree(free->arch.rmap);
+		free->arch.rmap = NULL;
+	}
 }
 
 int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
+	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
+	if (!slot->arch.rmap)
+		return -ENOMEM;
+
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 48e713188469..1309e69b57fa 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -504,7 +504,7 @@ struct kvm_lpage_info {
 };
 
 struct kvm_arch_memory_slot {
-	unsigned long *rmap_pde[KVM_NR_PAGE_SIZES - 1];
+	unsigned long *rmap[KVM_NR_PAGE_SIZES];
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ee768bb2367f..aa9a987ddefb 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -970,11 +970,8 @@ static unsigned long *__gfn_to_rmap(gfn_t gfn, int level,
 {
 	unsigned long idx;
 
-	if (likely(level == PT_PAGE_TABLE_LEVEL))
-		return &slot->rmap[gfn - slot->base_gfn];
-
 	idx = gfn_to_index(gfn, slot->base_gfn, level);
-	return &slot->arch.rmap_pde[level - PT_DIRECTORY_LEVEL][idx];
+	return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx];
 }
 
 /*
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index abc039d78428..ebf2109318e0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6303,14 +6303,18 @@ void kvm_arch_free_memslot(struct kvm_memory_slot *free,
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		if (!dont || free->arch.rmap_pde[i] != dont->arch.rmap_pde[i]) {
-			kvm_kvfree(free->arch.rmap_pde[i]);
-			free->arch.rmap_pde[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		if (!dont || free->arch.rmap[i] != dont->arch.rmap[i]) {
+			kvm_kvfree(free->arch.rmap[i]);
+			free->arch.rmap[i] = NULL;
 		}
-		if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) {
-			kvm_kvfree(free->arch.lpage_info[i]);
-			free->arch.lpage_info[i] = NULL;
+		if (i == 0)
+			continue;
+
+		if (!dont || free->arch.lpage_info[i - 1] !=
+			     dont->arch.lpage_info[i - 1]) {
+			kvm_kvfree(free->arch.lpage_info[i - 1]);
+			free->arch.lpage_info[i - 1] = NULL;
 		}
 	}
 }
@@ -6319,28 +6323,30 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 {
 	int i;
 
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
 		unsigned long ugfn;
 		int lpages;
-		int level = i + 2;
+		int level = i + 1;
 
 		lpages = gfn_to_index(slot->base_gfn + npages - 1,
 				      slot->base_gfn, level) + 1;
 
-		slot->arch.rmap_pde[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap_pde[i]));
-		if (!slot->arch.rmap_pde[i])
+		slot->arch.rmap[i] =
+			kvm_kvzalloc(lpages * sizeof(*slot->arch.rmap[i]));
+		if (!slot->arch.rmap[i])
 			goto out_free;
+		if (i == 0)
+			continue;
 
-		slot->arch.lpage_info[i] =
-			kvm_kvzalloc(lpages * sizeof(*slot->arch.lpage_info[i]));
-		if (!slot->arch.lpage_info[i])
+		slot->arch.lpage_info[i - 1] = kvm_kvzalloc(lpages *
+					sizeof(*slot->arch.lpage_info[i - 1]));
+		if (!slot->arch.lpage_info[i - 1])
 			goto out_free;
 
 		if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][0].write_count = 1;
+			slot->arch.lpage_info[i - 1][0].write_count = 1;
 		if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1))
-			slot->arch.lpage_info[i][lpages - 1].write_count = 1;
+			slot->arch.lpage_info[i - 1][lpages - 1].write_count = 1;
 		ugfn = slot->userspace_addr >> PAGE_SHIFT;
 		/*
 		 * If the gfn and userspace address are not aligned wrt each
@@ -6352,18 +6358,21 @@ int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages)
 			unsigned long j;
 
 			for (j = 0; j < lpages; ++j)
-				slot->arch.lpage_info[i][j].write_count = 1;
+				slot->arch.lpage_info[i - 1][j].write_count = 1;
 		}
 	}
 
 	return 0;
 
 out_free:
-	for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) {
-		kvm_kvfree(slot->arch.rmap_pde[i]);
-		kvm_kvfree(slot->arch.lpage_info[i]);
-		slot->arch.rmap_pde[i] = NULL;
-		slot->arch.lpage_info[i] = NULL;
+	for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
+		kvm_kvfree(slot->arch.rmap[i]);
+		slot->arch.rmap[i] = NULL;
+		if (i == 0)
+			continue;
+
+		kvm_kvfree(slot->arch.lpage_info[i - 1]);
+		slot->arch.lpage_info[i - 1] = NULL;
 	}
 	return -ENOMEM;
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbc65f9d6a2b..3c16f0f1fe35 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -213,7 +213,6 @@ struct kvm_memory_slot {
 	gfn_t base_gfn;
 	unsigned long npages;
 	unsigned long flags;
-	unsigned long *rmap;
 	unsigned long *dirty_bitmap;
 	struct kvm_arch_memory_slot arch;
 	unsigned long userspace_addr;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index bcf973ec98ff..14ec567816ab 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -550,16 +550,12 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
 				  struct kvm_memory_slot *dont)
 {
-	if (!dont || free->rmap != dont->rmap)
-		vfree(free->rmap);
-
 	if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
 		kvm_destroy_dirty_bitmap(free);
 
 	kvm_arch_free_memslot(free, dont);
 
 	free->npages = 0;
-	free->rmap = NULL;
 }
 
 void kvm_free_physmem(struct kvm *kvm)
@@ -768,11 +764,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if (npages && !old.npages) {
 		new.user_alloc = user_alloc;
 		new.userspace_addr = mem->userspace_addr;
-#ifndef CONFIG_S390
-		new.rmap = vzalloc(npages * sizeof(*new.rmap));
-		if (!new.rmap)
-			goto out_free;
-#endif /* not defined CONFIG_S390 */
+
 		if (kvm_arch_create_memslot(&new, npages))
 			goto out_free;
 	}
@@ -831,7 +823,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (!npages) {
-		new.rmap = NULL;
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
-- 
cgit v1.2.3


From 6c8ee57be9350c5c2cafdd6a99d0462d528676e2 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:37:54 +0800
Subject: KVM: introduce KVM_PFN_ERR_FAULT

After that, the exported and un-inline function, get_fault_pfn,
can be removed

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/kvm_main.c      | 12 +++---------
 3 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa9a987ddefb..9cf90c8d5843 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2512,7 +2512,7 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
 	if (!slot)
-		return get_fault_pfn();
+		return KVM_PFN_ERR_FAULT;
 
 	hva = gfn_to_hva_memslot(slot, gfn);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3c16f0f1fe35..ef5554f47486 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -48,6 +48,8 @@
 #define KVM_MAX_MMIO_FRAGMENTS \
 	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
 
+#define KVM_PFN_ERR_FAULT	(-EFAULT)
+
 /*
  * vcpu->requests bit members
  */
@@ -443,7 +445,6 @@ void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
 void kvm_get_pfn(pfn_t pfn);
-pfn_t get_fault_pfn(void);
 
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
 			int len);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 14ec567816ab..ef0491645a10 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -939,12 +939,6 @@ static pfn_t get_bad_pfn(void)
 	return -ENOENT;
 }
 
-pfn_t get_fault_pfn(void)
-{
-	return -EFAULT;
-}
-EXPORT_SYMBOL_GPL(get_fault_pfn);
-
 static pfn_t get_hwpoison_pfn(void)
 {
 	return -EHWPOISON;
@@ -1115,7 +1109,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		struct vm_area_struct *vma;
 
 		if (atomic)
-			return get_fault_pfn();
+			return KVM_PFN_ERR_FAULT;
 
 		down_read(&current->mm->mmap_sem);
 		if (npages == -EHWPOISON ||
@@ -1127,7 +1121,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		vma = find_vma_intersection(current->mm, addr, addr+1);
 
 		if (vma == NULL)
-			pfn = get_fault_pfn();
+			pfn = KVM_PFN_ERR_FAULT;
 		else if ((vma->vm_flags & VM_PFNMAP)) {
 			pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 				vma->vm_pgoff;
@@ -1135,7 +1129,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		} else {
 			if (async && (vma->vm_flags & VM_WRITE))
 				*async = true;
-			pfn = get_fault_pfn();
+			pfn = KVM_PFN_ERR_FAULT;
 		}
 		up_read(&current->mm->mmap_sem);
 	} else
-- 
cgit v1.2.3


From e6c1502b3f933ace20c711ce34ab696f5a67086d Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:38:36 +0800
Subject: KVM: introduce KVM_PFN_ERR_HWPOISON

Then, get_hwpoison_pfn and is_hwpoison_pfn can be removed

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  2 +-
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 13 +------------
 3 files changed, 3 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9cf90c8d5843..d3cdf69da513 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2649,7 +2649,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
 	kvm_release_pfn_clean(pfn);
-	if (is_hwpoison_pfn(pfn)) {
+	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
 	}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ef5554f47486..840f44a096c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -49,6 +49,7 @@
 	(KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS)
 
 #define KVM_PFN_ERR_FAULT	(-EFAULT)
+#define KVM_PFN_ERR_HWPOISON	(-EHWPOISON)
 
 /*
  * vcpu->requests bit members
@@ -395,7 +396,6 @@ extern struct page *bad_page;
 
 int is_error_page(struct page *page);
 int is_error_pfn(pfn_t pfn);
-int is_hwpoison_pfn(pfn_t pfn);
 int is_noslot_pfn(pfn_t pfn);
 int is_invalid_pfn(pfn_t pfn);
 int kvm_is_error_hva(unsigned long addr);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ef0491645a10..7fce2d5787ae 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -939,17 +939,6 @@ static pfn_t get_bad_pfn(void)
 	return -ENOENT;
 }
 
-static pfn_t get_hwpoison_pfn(void)
-{
-	return -EHWPOISON;
-}
-
-int is_hwpoison_pfn(pfn_t pfn)
-{
-	return pfn == -EHWPOISON;
-}
-EXPORT_SYMBOL_GPL(is_hwpoison_pfn);
-
 int is_noslot_pfn(pfn_t pfn)
 {
 	return pfn == -ENOENT;
@@ -1115,7 +1104,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 		if (npages == -EHWPOISON ||
 			(!async && check_user_page_hwpoison(addr))) {
 			up_read(&current->mm->mmap_sem);
-			return get_hwpoison_pfn();
+			return KVM_PFN_ERR_HWPOISON;
 		}
 
 		vma = find_vma_intersection(current->mm, addr, addr+1);
-- 
cgit v1.2.3


From cb9aaa30b133574b646d9d4766ef08a843211393 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:42:10 +0800
Subject: KVM: do not release the error pfn

After commit a2766325cf9f9, the error pfn is replaced by the
error code, it need not be released anymore

[ The patch has been compiling tested for powerpc ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500_tlb.c |  1 -
 arch/x86/kvm/mmu.c          |  7 +++----
 arch/x86/kvm/mmu_audit.c    |  4 +---
 arch/x86/kvm/paging_tmpl.h  |  8 ++------
 virt/kvm/iommu.c            |  1 -
 virt/kvm/kvm_main.c         | 14 ++++++++------
 6 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_tlb.c
index c8f6c5826742..09ce5ac128f8 100644
--- a/arch/powerpc/kvm/e500_tlb.c
+++ b/arch/powerpc/kvm/e500_tlb.c
@@ -524,7 +524,6 @@ static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
 		if (is_error_pfn(pfn)) {
 			printk(KERN_ERR "Couldn't get real page for gfn %lx!\n",
 					(long)gfn);
-			kvm_release_pfn_clean(pfn);
 			return;
 		}
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d3cdf69da513..9651c2cd0005 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2496,7 +2496,9 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 				rmap_recycle(vcpu, sptep, gfn);
 		}
 	}
-	kvm_release_pfn_clean(pfn);
+
+	if (!is_error_pfn(pfn))
+		kvm_release_pfn_clean(pfn);
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -2648,7 +2650,6 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
-	kvm_release_pfn_clean(pfn);
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
@@ -3273,8 +3274,6 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
 	if (!async)
 		return false; /* *pfn has correct page already */
 
-	kvm_release_pfn_clean(*pfn);
-
 	if (!prefault && can_do_async_pf(vcpu)) {
 		trace_kvm_try_async_get_page(gva, gfn);
 		if (kvm_find_async_pf_gfn(vcpu, gfn)) {
diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c
index ca403f9bb0f2..daff69e21150 100644
--- a/arch/x86/kvm/mmu_audit.c
+++ b/arch/x86/kvm/mmu_audit.c
@@ -116,10 +116,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level)
 	gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gfn);
 
-	if (is_error_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
+	if (is_error_pfn(pfn))
 		return;
-	}
 
 	hpa =  pfn << PAGE_SHIFT;
 	if ((*sptep & PT64_BASE_ADDR_MASK) != hpa)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bb7cf01cae76..bf8c42bf50fe 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -370,10 +370,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
 	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
-	if (mmu_invalid_pfn(pfn)) {
-		kvm_release_pfn_clean(pfn);
+	if (mmu_invalid_pfn(pfn))
 		return;
-	}
 
 	/*
 	 * we call mmu_set_spte() with host_writable = true because that
@@ -448,10 +446,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 				      pte_access & ACC_WRITE_MASK);
-		if (mmu_invalid_pfn(pfn)) {
-			kvm_release_pfn_clean(pfn);
+		if (mmu_invalid_pfn(pfn))
 			break;
-		}
 
 		mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0,
 			     NULL, PT_PAGE_TABLE_LEVEL, gfn,
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 6a67bea4019c..037cb6730e68 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -107,7 +107,6 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 		 */
 		pfn = kvm_pin_pages(slot, gfn, page_size);
 		if (is_error_pfn(pfn)) {
-			kvm_release_pfn_clean(pfn);
 			gfn += 1;
 			continue;
 		}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 93d3c6e063c8..eafba99d1070 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,9 +102,6 @@ static bool largepages_enabled = true;
 
 bool kvm_is_mmio_pfn(pfn_t pfn)
 {
-	if (is_error_pfn(pfn))
-		return false;
-
 	if (pfn_valid(pfn)) {
 		int reserved;
 		struct page *tail = pfn_to_page(pfn);
@@ -1165,10 +1162,13 @@ EXPORT_SYMBOL_GPL(gfn_to_page_many_atomic);
 
 static struct page *kvm_pfn_to_page(pfn_t pfn)
 {
-	WARN_ON(kvm_is_mmio_pfn(pfn));
+	if (is_error_pfn(pfn))
+		return KVM_ERR_PTR_BAD_PAGE;
 
-	if (is_error_pfn(pfn) || kvm_is_mmio_pfn(pfn))
+	if (kvm_is_mmio_pfn(pfn)) {
+		WARN_ON(1);
 		return KVM_ERR_PTR_BAD_PAGE;
+	}
 
 	return pfn_to_page(pfn);
 }
@@ -1193,7 +1193,9 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_error_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+	WARN_ON(is_error_pfn(pfn));
+
+	if (!kvm_is_mmio_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
-- 
cgit v1.2.3


From 32cad84f44d186654492f1a50a1424c8906ccbd9 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 3 Aug 2012 15:42:52 +0800
Subject: KVM: do not release the error page

After commit a2766325cf9f9, the error page is replaced by the
error code, it need not be released anymore

[ The patch has been compiling tested for powerpc ]

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/44x_tlb.c   | 1 -
 arch/powerpc/kvm/book3s_pr.c | 4 +---
 arch/x86/kvm/svm.c           | 1 -
 arch/x86/kvm/vmx.c           | 5 ++---
 arch/x86/kvm/x86.c           | 9 +++------
 include/linux/kvm_host.h     | 2 +-
 virt/kvm/async_pf.c          | 4 ++--
 virt/kvm/kvm_main.c          | 5 +++--
 8 files changed, 12 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 33aa715dab28..5dd3ab469976 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -319,7 +319,6 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr,
 	if (is_error_page(new_page)) {
 		printk(KERN_ERR "Couldn't get guest page for gfn %llx!\n",
 			(unsigned long long)gfn);
-		kvm_release_page_clean(new_page);
 		return;
 	}
 	hpaddr = page_to_phys(new_page);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a1baec340f7e..05c28f59f77f 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -242,10 +242,8 @@ static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
 	int i;
 
 	hpage = gfn_to_page(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
-	if (is_error_page(hpage)) {
-		kvm_release_page_clean(hpage);
+	if (is_error_page(hpage))
 		return;
-	}
 
 	hpage_offset = pte->raddr & ~PAGE_MASK;
 	hpage_offset &= ~0xFFFULL;
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 687d0c30e559..31be4a557447 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -2105,7 +2105,6 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, struct page **_page)
 	return kmap(page);
 
 error:
-	kvm_release_page_clean(page);
 	kvm_inject_gp(&svm->vcpu, 0);
 
 	return NULL;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d6e4cbc42b8e..cc8ad9836927 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -596,10 +596,9 @@ static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
 static struct page *nested_get_page(struct kvm_vcpu *vcpu, gpa_t addr)
 {
 	struct page *page = gfn_to_page(vcpu->kvm, addr >> PAGE_SHIFT);
-	if (is_error_page(page)) {
-		kvm_release_page_clean(page);
+	if (is_error_page(page))
 		return NULL;
-	}
+
 	return page;
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ebf2109318e0..7953a9e7cb17 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1639,10 +1639,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		vcpu->arch.time_page =
 				gfn_to_page(vcpu->kvm, data >> PAGE_SHIFT);
 
-		if (is_error_page(vcpu->arch.time_page)) {
-			kvm_release_page_clean(vcpu->arch.time_page);
+		if (is_error_page(vcpu->arch.time_page))
 			vcpu->arch.time_page = NULL;
-		}
+
 		break;
 	}
 	case MSR_KVM_ASYNC_PF_EN:
@@ -3945,10 +3944,8 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt,
 		goto emul_write;
 
 	page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-	if (is_error_page(page)) {
-		kvm_release_page_clean(page);
+	if (is_error_page(page))
 		goto emul_write;
-	}
 
 	kaddr = kmap_atomic(page);
 	kaddr += offset_in_page(gpa);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ce7c32950f4e..07226f820e6c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -457,7 +457,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
-void kvm_release_pfn_dirty(pfn_t);
+void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
 void kvm_set_pfn_accessed(pfn_t pfn);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 56f553391896..ea475cd03511 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -111,7 +111,7 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
 			list_entry(vcpu->async_pf.done.next,
 				   typeof(*work), link);
 		list_del(&work->link);
-		if (work->page)
+		if (!is_error_page(work->page))
 			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
@@ -138,7 +138,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
 
 		list_del(&work->queue);
 		vcpu->async_pf.queued--;
-		if (work->page)
+		if (!is_error_page(work->page))
 			kvm_release_page_clean(work->page);
 		kmem_cache_free(async_pf_cache, work);
 	}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index eafba99d1070..a2e85af847c1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1186,8 +1186,9 @@ EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page_clean(struct page *page)
 {
-	if (!is_error_page(page))
-		kvm_release_pfn_clean(page_to_pfn(page));
+	WARN_ON(is_error_page(page));
+
+	kvm_release_pfn_clean(page_to_pfn(page));
 }
 EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
-- 
cgit v1.2.3


From 8a5a87d9b7aef24bcdba763d8ee14982477b0a2e Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:26 +0300
Subject: KVM: clean up kvm_(set|get)_apic_base

kvm_get_apic_base() needlessly checks irqchip_in_kernel although it does
the same no matter what result of the check is. kvm_set_apic_base() also
checks for irqchip_in_kernel, but kvm_lapic_set_base() can handle this
case.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7953a9e7cb17..3cafbb12ae05 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,20 +246,14 @@ static void drop_user_return_notifiers(void *ignore)
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 {
-	if (irqchip_in_kernel(vcpu->kvm))
-		return vcpu->arch.apic_base;
-	else
-		return vcpu->arch.apic_base;
+	return vcpu->arch.apic_base;
 }
 EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
 {
 	/* TODO: reserve bits check */
-	if (irqchip_in_kernel(vcpu->kvm))
-		kvm_lapic_set_base(vcpu, data);
-	else
-		vcpu->arch.apic_base = data;
+	kvm_lapic_set_base(vcpu, data);
 }
 EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
-- 
cgit v1.2.3


From 5dbc8f3fed0b4cb04dfb276150294f21c5f0fc66 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:27 +0300
Subject: KVM: use kvm_lapic_set_base() to change apic_base

Do not change apic_base directly. Use kvm_lapic_set_base() instead.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 0cd431c85d38..49f4ac047f60 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1185,7 +1185,8 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	update_divide_count(apic);
 	atomic_set(&apic->lapic_timer.pending, 0);
 	if (kvm_vcpu_is_bsp(vcpu))
-		vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
+		kvm_lapic_set_base(vcpu,
+				vcpu->arch.apic_base | MSR_IA32_APICBASE_BSP);
 	vcpu->arch.pv_eoi.msr_val = 0;
 	apic_update_ppr(apic);
 
@@ -1310,8 +1311,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
-	apic->base_address = APIC_DEFAULT_PHYS_BASE;
-	vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE;
+	kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE);
 
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
@@ -1380,8 +1380,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	apic->base_address = vcpu->arch.apic_base &
-			     MSR_IA32_APICBASE_BASE;
+	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
-- 
cgit v1.2.3


From 6aed64a8a440360be875f31eabeacaddb83ef25f Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:28 +0300
Subject: KVM: mark apic enabled on start up

According to SDM apic is enabled on start up.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 49f4ac047f60..c3f14fe51e9b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1311,7 +1311,8 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
-	kvm_lapic_set_base(vcpu, APIC_DEFAULT_PHYS_BASE);
+	kvm_lapic_set_base(vcpu,
+			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
-- 
cgit v1.2.3


From c5cc421ba3219b90f11d151bc55f1608c12830fa Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:30 +0300
Subject: KVM: use jump label to optimize checking for HW enabled APIC in
 APIC_BASE MSR

Usually all APICs are HW enabled so the check can be optimized out.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 29 ++++++++++++++++++++++++++++-
 arch/x86/kvm/lapic.h |  1 +
 arch/x86/kvm/x86.c   |  1 +
 3 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index c3f14fe51e9b..5b46cab044a5 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -34,6 +34,7 @@
 #include <asm/current.h>
 #include <asm/apicdef.h>
 #include <linux/atomic.h>
+#include <linux/jump_label.h>
 #include "kvm_cache_regs.h"
 #include "irq.h"
 #include "trace.h"
@@ -117,9 +118,13 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 	return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
 }
 
+struct static_key_deferred apic_hw_disabled __read_mostly;
+
 static inline int apic_hw_enabled(struct kvm_lapic *apic)
 {
-	return (apic)->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	if (static_key_false(&apic_hw_disabled.key))
+		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	return MSR_IA32_APICBASE_ENABLE;
 }
 
 static inline int  apic_sw_enabled(struct kvm_lapic *apic)
@@ -1055,6 +1060,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 
 	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
 
+	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
+		static_key_slow_dec_deferred(&apic_hw_disabled);
+
 	if (vcpu->arch.apic->regs)
 		free_page((unsigned long)vcpu->arch.apic->regs);
 
@@ -1125,6 +1133,14 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 		return;
 	}
 
+	/* update jump label if enable bit changes */
+	if ((vcpu->arch.apic_base ^ value) & MSR_IA32_APICBASE_ENABLE) {
+		if (value & MSR_IA32_APICBASE_ENABLE)
+			static_key_slow_dec_deferred(&apic_hw_disabled);
+		else
+			static_key_slow_inc(&apic_hw_disabled.key);
+	}
+
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
 		value &= ~MSR_IA32_APICBASE_BSP;
 
@@ -1311,6 +1327,11 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 		     HRTIMER_MODE_ABS);
 	apic->lapic_timer.timer.function = apic_timer_fn;
 
+	/*
+	 * APIC is created enabled. This will prevent kvm_lapic_set_base from
+	 * thinking that APIC satet has changed.
+	 */
+	vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
 	kvm_lapic_set_base(vcpu,
 			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
@@ -1598,3 +1619,9 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data)
 	return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data,
 					 addr);
 }
+
+void kvm_lapic_init(void)
+{
+	/* do not patch jump label more than once per second */
+	jump_label_rate_limit(&apic_hw_disabled, HZ);
+}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 166766fffd9f..73fa299b68e8 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -78,4 +78,5 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 }
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
+void kvm_lapic_init(void);
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3cafbb12ae05..29fa18d27e6e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4903,6 +4903,7 @@ int kvm_arch_init(void *opaque)
 	if (cpu_has_xsave)
 		host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
 
+	kvm_lapic_init();
 	return 0;
 
 out:
-- 
cgit v1.2.3


From f8c1ea103947038b7197bdd4c8451886a58af0c0 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:31 +0300
Subject: KVM: use jump label to optimize checking for SW enabled apic in
 spurious interrupt register

Usually all APICs are SW enabled so the check can be optimized out.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5b46cab044a5..7f77e96ac5e8 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -127,9 +127,24 @@ static inline int apic_hw_enabled(struct kvm_lapic *apic)
 	return MSR_IA32_APICBASE_ENABLE;
 }
 
-static inline int  apic_sw_enabled(struct kvm_lapic *apic)
+struct static_key_deferred apic_sw_disabled __read_mostly;
+
+static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
+{
+	if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
+		if (val & APIC_SPIV_APIC_ENABLED)
+			static_key_slow_dec_deferred(&apic_sw_disabled);
+		else
+			static_key_slow_inc(&apic_sw_disabled.key);
+	}
+	apic_set_reg(apic, APIC_SPIV, val);
+}
+
+static inline int apic_sw_enabled(struct kvm_lapic *apic)
 {
-	return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	if (static_key_false(&apic_sw_disabled.key))
+		return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	return APIC_SPIV_APIC_ENABLED;
 }
 
 static inline int apic_enabled(struct kvm_lapic *apic)
@@ -918,7 +933,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		u32 mask = 0x3ff;
 		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 			mask |= APIC_SPIV_DIRECTED_EOI;
-		apic_set_reg(apic, APIC_SPIV, val & mask);
+		apic_set_spiv(apic, val & mask);
 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
 			int i;
 			u32 lvt_val;
@@ -1055,18 +1070,23 @@ EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
 
 void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
+	struct kvm_lapic *apic = vcpu->arch.apic;
+
 	if (!vcpu->arch.apic)
 		return;
 
-	hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer);
+	hrtimer_cancel(&apic->lapic_timer.timer);
 
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (vcpu->arch.apic->regs)
-		free_page((unsigned long)vcpu->arch.apic->regs);
+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+		static_key_slow_dec_deferred(&apic_sw_disabled);
 
-	kfree(vcpu->arch.apic);
+	if (apic->regs)
+		free_page((unsigned long)apic->regs);
+
+	kfree(apic);
 }
 
 /*
@@ -1182,7 +1202,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
-	apic_set_reg(apic, APIC_SPIV, 0xff);
+	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
 	apic_set_reg(apic, APIC_LDR, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
@@ -1335,6 +1355,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
 	kvm_lapic_set_base(vcpu,
 			APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE);
 
+	static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
 	kvm_lapic_reset(vcpu);
 	kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
 
@@ -1404,6 +1425,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
+	apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1624,4 +1646,5 @@ void kvm_lapic_init(void)
 {
 	/* do not patch jump label more than once per second */
 	jump_label_rate_limit(&apic_hw_disabled, HZ);
+	jump_label_rate_limit(&apic_sw_disabled, HZ);
 }
-- 
cgit v1.2.3


From 54e9818f3903902a4ea3046035739b8770880092 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:32 +0300
Subject: KVM: use jump label to optimize checking for in kernel local apic
 presence

Usually all vcpus have local apic pointer initialized, so the check may
be completely skipped.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 62 ++++++++++++++++++++++++++++------------------------
 arch/x86/kvm/x86.c   |  7 +++++-
 arch/x86/kvm/x86.h   |  1 +
 3 files changed, 41 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 7f77e96ac5e8..650379ba73af 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -152,6 +152,13 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	return apic_sw_enabled(apic) &&	apic_hw_enabled(apic);
 }
 
+static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
+{
+	if (static_key_false(&kvm_no_apic_vcpu))
+		return vcpu->arch.apic;
+	return true;
+}
+
 #define LVT_MASK	\
 	(APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
 
@@ -204,7 +211,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	struct kvm_cpuid_entry2 *feat;
 	u32 v = APIC_VERSION;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return;
 
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -305,7 +312,6 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
 
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
 	/* This may race with setting of irr in __apic_accept_irq() and
@@ -313,9 +319,9 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 	 * will cause vmexit immediately and the value will be recalculated
 	 * on the next vmentry.
 	 */
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return 0;
-	highest_irr = apic_find_highest_irr(apic);
+	highest_irr = apic_find_highest_irr(vcpu->arch.apic);
 
 	return highest_irr;
 }
@@ -1061,9 +1067,7 @@ static int apic_mmio_write(struct kvm_io_device *this,
 
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
-
-	if (apic)
+	if (vcpu_has_lapic(vcpu))
 		apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1098,10 +1102,9 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	if (!apic)
-		return 0;
 
-	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+			apic_lvtt_period(apic))
 		return 0;
 
 	return apic->lapic_timer.tscdeadline;
@@ -1110,10 +1113,9 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	if (!apic)
-		return;
 
-	if (apic_lvtt_oneshot(apic) || apic_lvtt_period(apic))
+	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+			apic_lvtt_period(apic))
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1125,20 +1127,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return;
+
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
 		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	u64 tpr;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return 0;
-	tpr = (u64) apic_get_reg(apic, APIC_TASKPRI);
+
+	tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -1237,7 +1240,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 
 bool kvm_apic_present(struct kvm_vcpu *vcpu)
 {
-	return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic);
+	return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic);
 }
 
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
@@ -1258,10 +1261,11 @@ static bool lapic_is_periodic(struct kvm_lapic *apic)
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *lapic = vcpu->arch.apic;
+	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT))
-		return atomic_read(&lapic->lapic_timer.pending);
+	if (vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
+			apic_lvt_enabled(apic, APIC_LVTT))
+		return atomic_read(&apic->lapic_timer.pending);
 
 	return 0;
 }
@@ -1371,7 +1375,7 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
-	if (!apic || !apic_enabled(apic))
+	if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic))
 		return -1;
 
 	apic_update_ppr(apic);
@@ -1399,7 +1403,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (apic && atomic_read(&apic->lapic_timer.pending) > 0) {
+	if (!vcpu_has_lapic(vcpu))
+		return;
+
+	if (atomic_read(&apic->lapic_timer.pending) > 0) {
 		if (kvm_apic_local_deliver(apic, APIC_LVTT))
 			atomic_dec(&apic->lapic_timer.pending);
 	}
@@ -1439,13 +1446,12 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
-	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct hrtimer *timer;
 
-	if (!apic)
+	if (!vcpu_has_lapic(vcpu))
 		return;
 
-	timer = &apic->lapic_timer.timer;
+	timer = &vcpu->arch.apic->lapic_timer.timer;
 	if (hrtimer_cancel(timer))
 		hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
 }
@@ -1602,7 +1608,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return 1;
 
 	/* if this is ICR write vector before command */
@@ -1616,7 +1622,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 low, high = 0;
 
-	if (!irqchip_in_kernel(vcpu->kvm))
+	if (!vcpu_has_lapic(vcpu))
 		return 1;
 
 	if (apic_reg_read(apic, reg, 4, &low))
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 29fa18d27e6e..8ebf65c349eb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6152,6 +6152,8 @@ bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
 }
 
+struct static_key kvm_no_apic_vcpu __read_mostly;
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	struct page *page;
@@ -6184,7 +6186,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 		r = kvm_create_lapic(vcpu);
 		if (r < 0)
 			goto fail_mmu_destroy;
-	}
+	} else
+		static_key_slow_inc(&kvm_no_apic_vcpu);
 
 	vcpu->arch.mce_banks = kzalloc(KVM_MAX_MCE_BANKS * sizeof(u64) * 4,
 				       GFP_KERNEL);
@@ -6224,6 +6227,8 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvm_mmu_destroy(vcpu);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	free_page((unsigned long)vcpu->arch.pio_data);
+	if (!irqchip_in_kernel(vcpu->kvm))
+		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 3d1134ddb885..2b5219c12ac8 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -124,4 +124,5 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 
 extern u64 host_xcr0;
 
+extern struct static_key kvm_no_apic_vcpu;
 #endif
-- 
cgit v1.2.3


From c48f14966cc41957d88c66dfe49a439e708ab7b8 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 5 Aug 2012 15:58:33 +0300
Subject: KVM: inline kvm_apic_present() and kvm_lapic_enabled()

Those functions are used during interrupt injection. When inlined they
become nops on the fast path.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 143 +++++++++++++++++++--------------------------------
 arch/x86/kvm/lapic.h |  45 +++++++++++++++-
 2 files changed, 96 insertions(+), 92 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 650379ba73af..333c27fa6e9f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -73,11 +73,6 @@
 static unsigned int min_timer_period_us = 500;
 module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
 
-static inline u32 apic_get_reg(struct kvm_lapic *apic, int reg_off)
-{
-	return *((u32 *) (apic->regs + reg_off));
-}
-
 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 {
 	*((u32 *) (apic->regs + reg_off)) = val;
@@ -119,19 +114,11 @@ static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
 }
 
 struct static_key_deferred apic_hw_disabled __read_mostly;
-
-static inline int apic_hw_enabled(struct kvm_lapic *apic)
-{
-	if (static_key_false(&apic_hw_disabled.key))
-		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
-	return MSR_IA32_APICBASE_ENABLE;
-}
-
 struct static_key_deferred apic_sw_disabled __read_mostly;
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	if ((apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
+	if ((kvm_apic_get_reg(apic, APIC_SPIV) ^ val) & APIC_SPIV_APIC_ENABLED) {
 		if (val & APIC_SPIV_APIC_ENABLED)
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 		else
@@ -140,23 +127,9 @@ static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 	apic_set_reg(apic, APIC_SPIV, val);
 }
 
-static inline int apic_sw_enabled(struct kvm_lapic *apic)
-{
-	if (static_key_false(&apic_sw_disabled.key))
-		return apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
-}
-
 static inline int apic_enabled(struct kvm_lapic *apic)
 {
-	return apic_sw_enabled(apic) &&	apic_hw_enabled(apic);
-}
-
-static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
-{
-	if (static_key_false(&kvm_no_apic_vcpu))
-		return vcpu->arch.apic;
-	return true;
+	return kvm_apic_sw_enabled(apic) &&	kvm_apic_hw_enabled(apic);
 }
 
 #define LVT_MASK	\
@@ -168,34 +141,34 @@ static inline bool vcpu_has_lapic(struct kvm_vcpu *vcpu)
 
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
-	return (apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
+	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
-	return !(apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
+	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
 }
 
 static inline int apic_lvt_vector(struct kvm_lapic *apic, int lvt_type)
 {
-	return apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
+	return kvm_apic_get_reg(apic, lvt_type) & APIC_VECTOR_MASK;
 }
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((apic_get_reg(apic, APIC_LVTT) &
+	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		apic->lapic_timer.timer_mode_mask) ==
 			APIC_LVT_TIMER_TSCDEADLINE);
 }
@@ -211,7 +184,7 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	struct kvm_cpuid_entry2 *feat;
 	u32 v = APIC_VERSION;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
@@ -319,7 +292,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 	 * will cause vmexit immediately and the value will be recalculated
 	 * on the next vmentry.
 	 */
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 0;
 	highest_irr = apic_find_highest_irr(vcpu->arch.apic);
 
@@ -404,8 +377,8 @@ static void apic_update_ppr(struct kvm_lapic *apic)
 	u32 tpr, isrv, ppr, old_ppr;
 	int isr;
 
-	old_ppr = apic_get_reg(apic, APIC_PROCPRI);
-	tpr = apic_get_reg(apic, APIC_TASKPRI);
+	old_ppr = kvm_apic_get_reg(apic, APIC_PROCPRI);
+	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI);
 	isr = apic_find_highest_isr(apic);
 	isrv = (isr != -1) ? isr : 0;
 
@@ -441,13 +414,13 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 	u32 logical_id;
 
 	if (apic_x2apic_mode(apic)) {
-		logical_id = apic_get_reg(apic, APIC_LDR);
+		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
 	}
 
-	logical_id = GET_APIC_LOGICAL_ID(apic_get_reg(apic, APIC_LDR));
+	logical_id = GET_APIC_LOGICAL_ID(kvm_apic_get_reg(apic, APIC_LDR));
 
-	switch (apic_get_reg(apic, APIC_DFR)) {
+	switch (kvm_apic_get_reg(apic, APIC_DFR)) {
 	case APIC_DFR_FLAT:
 		if (logical_id & mda)
 			result = 1;
@@ -459,7 +432,7 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
 		break;
 	default:
 		apic_debug("Bad DFR vcpu %d: %08x\n",
-			   apic->vcpu->vcpu_id, apic_get_reg(apic, APIC_DFR));
+			   apic->vcpu->vcpu_id, kvm_apic_get_reg(apic, APIC_DFR));
 		break;
 	}
 
@@ -617,7 +590,7 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 	apic_clear_isr(vector, apic);
 	apic_update_ppr(apic);
 
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI) &&
 	    kvm_ioapic_handles_vector(apic->vcpu->kvm, vector)) {
 		int trigger_mode;
 		if (apic_test_vector(vector, apic->regs + APIC_TMR))
@@ -632,8 +605,8 @@ static int apic_set_eoi(struct kvm_lapic *apic)
 
 static void apic_send_ipi(struct kvm_lapic *apic)
 {
-	u32 icr_low = apic_get_reg(apic, APIC_ICR);
-	u32 icr_high = apic_get_reg(apic, APIC_ICR2);
+	u32 icr_low = kvm_apic_get_reg(apic, APIC_ICR);
+	u32 icr_high = kvm_apic_get_reg(apic, APIC_ICR2);
 	struct kvm_lapic_irq irq;
 
 	irq.vector = icr_low & APIC_VECTOR_MASK;
@@ -668,7 +641,7 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
 	ASSERT(apic != NULL);
 
 	/* if initial count is 0, current count should also be 0 */
-	if (apic_get_reg(apic, APIC_TMICT) == 0)
+	if (kvm_apic_get_reg(apic, APIC_TMICT) == 0)
 		return 0;
 
 	remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
@@ -724,13 +697,13 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
 		break;
 	case APIC_PROCPRI:
 		apic_update_ppr(apic);
-		val = apic_get_reg(apic, offset);
+		val = kvm_apic_get_reg(apic, offset);
 		break;
 	case APIC_TASKPRI:
 		report_tpr_access(apic, false);
 		/* fall thru */
 	default:
-		val = apic_get_reg(apic, offset);
+		val = kvm_apic_get_reg(apic, offset);
 		break;
 	}
 
@@ -782,7 +755,7 @@ static int apic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
 
 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
 {
-	return apic_hw_enabled(apic) &&
+	return kvm_apic_hw_enabled(apic) &&
 	    addr >= apic->base_address &&
 	    addr < apic->base_address + LAPIC_MMIO_LENGTH;
 }
@@ -805,7 +778,7 @@ static void update_divide_count(struct kvm_lapic *apic)
 {
 	u32 tmp1, tmp2, tdcr;
 
-	tdcr = apic_get_reg(apic, APIC_TDCR);
+	tdcr = kvm_apic_get_reg(apic, APIC_TDCR);
 	tmp1 = tdcr & 0xf;
 	tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
 	apic->divide_count = 0x1 << (tmp2 & 0x7);
@@ -822,7 +795,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 	if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
 		/* lapic timer in oneshot or periodic mode */
 		now = apic->lapic_timer.timer.base->get_time();
-		apic->lapic_timer.period = (u64)apic_get_reg(apic, APIC_TMICT)
+		apic->lapic_timer.period = (u64)kvm_apic_get_reg(apic, APIC_TMICT)
 			    * APIC_BUS_CYCLE_NS * apic->divide_count;
 
 		if (!apic->lapic_timer.period)
@@ -854,7 +827,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 			   "timer initial count 0x%x, period %lldns, "
 			   "expire @ 0x%016" PRIx64 ".\n", __func__,
 			   APIC_BUS_CYCLE_NS, ktime_to_ns(now),
-			   apic_get_reg(apic, APIC_TMICT),
+			   kvm_apic_get_reg(apic, APIC_TMICT),
 			   apic->lapic_timer.period,
 			   ktime_to_ns(ktime_add_ns(now,
 					apic->lapic_timer.period)));
@@ -886,7 +859,7 @@ static void start_apic_timer(struct kvm_lapic *apic)
 
 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
 {
-	int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0));
+	int nmi_wd_enabled = apic_lvt_nmi_mode(kvm_apic_get_reg(apic, APIC_LVT0));
 
 	if (apic_lvt_nmi_mode(lvt0_val)) {
 		if (!nmi_wd_enabled) {
@@ -937,7 +910,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_SPIV: {
 		u32 mask = 0x3ff;
-		if (apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
+		if (kvm_apic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
 			mask |= APIC_SPIV_DIRECTED_EOI;
 		apic_set_spiv(apic, val & mask);
 		if (!(val & APIC_SPIV_APIC_ENABLED)) {
@@ -945,7 +918,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 			u32 lvt_val;
 
 			for (i = 0; i < APIC_LVT_NUM; i++) {
-				lvt_val = apic_get_reg(apic,
+				lvt_val = kvm_apic_get_reg(apic,
 						       APIC_LVTT + 0x10 * i);
 				apic_set_reg(apic, APIC_LVTT + 0x10 * i,
 					     lvt_val | APIC_LVT_MASKED);
@@ -974,7 +947,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	case APIC_LVT1:
 	case APIC_LVTERR:
 		/* TODO: Check vector */
-		if (!apic_sw_enabled(apic))
+		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 
 		val &= apic_lvt_mask[(reg - APIC_LVTT) >> 4];
@@ -983,12 +956,12 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 		break;
 
 	case APIC_LVTT:
-		if ((apic_get_reg(apic, APIC_LVTT) &
+		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
 		    apic->lapic_timer.timer_mode_mask) !=
 		   (val & apic->lapic_timer.timer_mode_mask))
 			hrtimer_cancel(&apic->lapic_timer.timer);
 
-		if (!apic_sw_enabled(apic))
+		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
@@ -1067,7 +1040,7 @@ static int apic_mmio_write(struct kvm_io_device *this,
 
 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
 {
-	if (vcpu_has_lapic(vcpu))
+	if (kvm_vcpu_has_lapic(vcpu))
 		apic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
@@ -1084,7 +1057,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1103,7 +1076,7 @@ u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+	if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
 			apic_lvtt_period(apic))
 		return 0;
 
@@ -1114,7 +1087,7 @@ void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
+	if (!kvm_vcpu_has_lapic(vcpu) || apic_lvtt_oneshot(apic) ||
 			apic_lvtt_period(apic))
 		return;
 
@@ -1127,21 +1100,21 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	apic_set_tpr(apic, ((cr8 & 0x0f) << 4)
-		     | (apic_get_reg(apic, APIC_TASKPRI) & 4));
+		     | (kvm_apic_get_reg(apic, APIC_TASKPRI) & 4));
 }
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
 	u64 tpr;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 0;
 
-	tpr = (u64) apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
+	tpr = (u64) kvm_apic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
 
 	return (tpr & 0xf0) >> 4;
 }
@@ -1238,16 +1211,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 		   vcpu->arch.apic_base, apic->base_address);
 }
 
-bool kvm_apic_present(struct kvm_vcpu *vcpu)
-{
-	return vcpu_has_lapic(vcpu) && apic_hw_enabled(vcpu->arch.apic);
-}
-
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
-{
-	return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic);
-}
-
 /*
  *----------------------------------------------------------------------
  * timer interface
@@ -1263,7 +1226,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
+	if (kvm_vcpu_has_lapic(vcpu) && apic_enabled(apic) &&
 			apic_lvt_enabled(apic, APIC_LVTT))
 		return atomic_read(&apic->lapic_timer.pending);
 
@@ -1272,10 +1235,10 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu)
 
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
 {
-	u32 reg = apic_get_reg(apic, lvt_type);
+	u32 reg = kvm_apic_get_reg(apic, lvt_type);
 	int vector, mode, trig_mode;
 
-	if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
+	if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
 		vector = reg & APIC_VECTOR_MASK;
 		mode = reg & APIC_MODE_MASK;
 		trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
@@ -1375,23 +1338,23 @@ int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	int highest_irr;
 
-	if (!vcpu_has_lapic(vcpu) || !apic_enabled(apic))
+	if (!kvm_vcpu_has_lapic(vcpu) || !apic_enabled(apic))
 		return -1;
 
 	apic_update_ppr(apic);
 	highest_irr = apic_find_highest_irr(apic);
 	if ((highest_irr == -1) ||
-	    ((highest_irr & 0xF0) <= apic_get_reg(apic, APIC_PROCPRI)))
+	    ((highest_irr & 0xF0) <= kvm_apic_get_reg(apic, APIC_PROCPRI)))
 		return -1;
 	return highest_irr;
 }
 
 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
 {
-	u32 lvt0 = apic_get_reg(vcpu->arch.apic, APIC_LVT0);
+	u32 lvt0 = kvm_apic_get_reg(vcpu->arch.apic, APIC_LVT0);
 	int r = 0;
 
-	if (!apic_hw_enabled(vcpu->arch.apic))
+	if (!kvm_apic_hw_enabled(vcpu->arch.apic))
 		r = 1;
 	if ((lvt0 & APIC_LVT_MASKED) == 0 &&
 	    GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
@@ -1403,7 +1366,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	if (atomic_read(&apic->lapic_timer.pending) > 0) {
@@ -1432,7 +1395,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
 	kvm_apic_set_version(vcpu);
-	apic_set_spiv(apic, apic_get_reg(apic, APIC_SPIV));
+	apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
@@ -1448,7 +1411,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
 {
 	struct hrtimer *timer;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return;
 
 	timer = &vcpu->arch.apic->lapic_timer.timer;
@@ -1549,7 +1512,7 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
 	if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
 		return;
 
-	tpr = apic_get_reg(apic, APIC_TASKPRI) & 0xff;
+	tpr = kvm_apic_get_reg(apic, APIC_TASKPRI) & 0xff;
 	max_irr = apic_find_highest_irr(apic);
 	if (max_irr < 0)
 		max_irr = 0;
@@ -1608,7 +1571,7 @@ int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 1;
 
 	/* if this is ICR write vector before command */
@@ -1622,7 +1585,7 @@ int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	u32 low, high = 0;
 
-	if (!vcpu_has_lapic(vcpu))
+	if (!kvm_vcpu_has_lapic(vcpu))
 		return 1;
 
 	if (apic_reg_read(apic, reg, 4, &low))
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 73fa299b68e8..2ad9caa06f94 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -55,8 +55,6 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
-int kvm_lapic_enabled(struct kvm_vcpu *vcpu);
-bool kvm_apic_present(struct kvm_vcpu *vcpu);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
@@ -79,4 +77,47 @@ static inline bool kvm_hv_vapic_assist_page_enabled(struct kvm_vcpu *vcpu)
 
 int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data);
 void kvm_lapic_init(void);
+
+static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
+{
+	        return *((u32 *) (apic->regs + reg_off));
+}
+
+extern struct static_key kvm_no_apic_vcpu;
+
+static inline bool kvm_vcpu_has_lapic(struct kvm_vcpu *vcpu)
+{
+	if (static_key_false(&kvm_no_apic_vcpu))
+		return vcpu->arch.apic;
+	return true;
+}
+
+extern struct static_key_deferred apic_hw_disabled;
+
+static inline int kvm_apic_hw_enabled(struct kvm_lapic *apic)
+{
+	if (static_key_false(&apic_hw_disabled.key))
+		return apic->vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE;
+	return MSR_IA32_APICBASE_ENABLE;
+}
+
+extern struct static_key_deferred apic_sw_disabled;
+
+static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+{
+	if (static_key_false(&apic_sw_disabled.key))
+		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
+	return APIC_SPIV_APIC_ENABLED;
+}
+
+static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_has_lapic(vcpu) && kvm_apic_hw_enabled(vcpu->arch.apic);
+}
+
+static inline int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
+{
+	return kvm_apic_present(vcpu) && kvm_apic_sw_enabled(vcpu->arch.apic);
+}
+
 #endif
-- 
cgit v1.2.3


From a9ad773e0dd833651f0831020a0ea0265c29f2ea Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:36 +0200
Subject: x86, cpu: Fixup tlb_flushall_shift formatting

The TLB characteristics appeared like this in dmesg:

[    0.065817] Last level iTLB entries: 4KB 512, 2MB 1024, 4MB 512
[    0.065817] Last level dTLB entries: 4KB 1024, 2MB 1024, 4MB 512
[    0.065817] tlb_flushall_shift is 0xffffffff

where tlb_flushall_shift is actually -1 but dumped as a hex number.
However, the Kconfig option CONFIG_DEBUG_TLBFLUSH and the rest of the
code treats this as a signed decimal and states "If you set it to -1,
the code flushes the whole TLB unconditionally."

So, fix its formatting in accordance with the other references to it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-2-git-send-email-bp@amd64.org
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 46d8786d655e..d239977f361f 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -474,7 +474,7 @@ void __cpuinit cpu_detect_tlb(struct cpuinfo_x86 *c)
 
 	printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \
 		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n"	     \
-		"tlb_flushall_shift is 0x%x\n",
+		"tlb_flushall_shift: %d\n",
 		tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
 		tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
 		tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
-- 
cgit v1.2.3


From 5b556332c3ab19e6375836d35ca658776e9ba0f6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:37 +0200
Subject: x86, cpu: Push TLB detection CPUID check down

Push the max CPUID leaf check into the ->detect_tlb function and remove
general test case from the generic path.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-3-git-send-email-bp@amd64.org
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/common.c | 3 +--
 arch/x86/kernel/cpu/intel.c  | 4 ++++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index d239977f361f..080f4a737e3e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -940,8 +940,7 @@ void __init identify_boot_cpu(void)
 #else
 	vgetcpu_set_mode();
 #endif
-	if (boot_cpu_data.cpuid_level >= 2)
-		cpu_detect_tlb(&boot_cpu_data);
+	cpu_detect_tlb(&boot_cpu_data);
 }
 
 void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0a4ce2980a5a..198e019a531a 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -648,6 +648,10 @@ static void __cpuinit intel_detect_tlb(struct cpuinfo_x86 *c)
 	int i, j, n;
 	unsigned int regs[4];
 	unsigned char *desc = (unsigned char *)regs;
+
+	if (c->cpuid_level < 2)
+		return;
+
 	/* Number of times to iterate */
 	n = cpuid_eax(2) & 0xFF;
 
-- 
cgit v1.2.3


From b46882e4c4de4813947fce940fe74af794a1eb72 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:38 +0200
Subject: x86, cpu: Add AMD TLB size detection

Read I- and DTLB entries count from CPUID on AMD. Handle all the
different family-specific cases.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-4-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 9d92e19039f0..bcd200839c90 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,59 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
 }
 #endif
 
+static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+{
+	u32 ebx, eax, ecx, edx;
+	u16 mask = 0xfff;
+
+	if (c->x86 < 0xf)
+		return;
+
+	if (c->extended_cpuid_level < 0x80000006)
+		return;
+
+	cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
+
+	tlb_lld_4k[ENTRIES] = (ebx >> 16) & mask;
+	tlb_lli_4k[ENTRIES] = ebx & mask;
+
+	/*
+	 * K8 doesn't have 2M/4M entries in the L2 TLB so read out the L1 TLB
+	 * characteristics from the CPUID function 0x80000005 instead.
+	 */
+	if (c->x86 == 0xf) {
+		cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+		mask = 0xff;
+	}
+
+	/* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+	if (!((eax >> 16) & mask)) {
+		u32 a, b, c, d;
+
+		cpuid(0x80000005, &a, &b, &c, &d);
+		tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff;
+	} else {
+		tlb_lld_2m[ENTRIES] = (eax >> 16) & mask;
+	}
+
+	/* a 4M entry uses two 2M entries */
+	tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1;
+
+	/* Handle ITLB 2M and 4M sizes, fall back to L1 if L2 is disabled */
+	if (!(eax & mask)) {
+		/* Erratum 658 */
+		if (c->x86 == 0x15 && c->x86_model <= 0x1f) {
+			tlb_lli_2m[ENTRIES] = 1024;
+		} else {
+			cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+			tlb_lli_2m[ENTRIES] = eax & 0xff;
+		}
+	} else
+		tlb_lli_2m[ENTRIES] = eax & mask;
+
+	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+}
+
 static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_vendor	= "AMD",
 	.c_ident	= { "AuthenticAMD" },
@@ -756,6 +809,7 @@ static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
 	.c_size_cache	= amd_size_cache,
 #endif
 	.c_early_init   = early_init_amd,
+	.c_detect_tlb	= cpu_detect_tlb_amd,
 	.c_bsp_init	= bsp_init_amd,
 	.c_init		= init_amd,
 	.c_x86_vendor	= X86_VENDOR_AMD,
-- 
cgit v1.2.3


From 057237bb35a605d795fd787868a1088705f26ee5 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 6 Aug 2012 19:00:39 +0200
Subject: x86, cpu: Preset default tlb_flushall_shift on AMD

Run the mprotect.c microbenchmark on all our families >= K8 and preset
the flushall shift variable accordingly.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344272439-29080-5-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/cpu/amd.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index bcd200839c90..f7e98a2c0d12 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -737,6 +737,17 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
 }
 #endif
 
+static void __cpuinit cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
+{
+	if (!cpu_has_invlpg)
+		return;
+
+	tlb_flushall_shift = 5;
+
+	if (c->x86 <= 0x11)
+		tlb_flushall_shift = 4;
+}
+
 static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
 {
 	u32 ebx, eax, ecx, edx;
@@ -788,6 +799,8 @@ static void __cpuinit cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
 		tlb_lli_2m[ENTRIES] = eax & mask;
 
 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+
+	cpu_set_tlb_flushall_shift(c);
 }
 
 static const struct cpu_dev __cpuinitconst amd_cpu_dev = {
-- 
cgit v1.2.3


From 256f631f1f7e7bedc882510679ad4473a2274708 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 14 Sep 2012 13:34:43 +0000
Subject: xen/arm: Introduce xen_ulong_t for unsigned long

All the original Xen headers have xen_ulong_t as unsigned long type, however
when they have been imported in Linux, xen_ulong_t has been replaced with
unsigned long. That might work for x86 and ia64 but it does not for arm.
Bring back xen_ulong_t and let each architecture define xen_ulong_t as they
see fit.

Also explicitly size pointers (__DEFINE_GUEST_HANDLE) to 64 bit.


Changes in v3:

- remove the incorrect changes to multicall_entry;
- remove the change to apic_physbase.


Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/arm/include/asm/xen/interface.h  |  8 ++++++--
 arch/ia64/include/asm/xen/interface.h |  1 +
 arch/x86/include/asm/xen/interface.h  |  1 +
 include/xen/interface/memory.h        | 12 ++++++------
 include/xen/interface/physdev.h       |  2 +-
 include/xen/interface/version.h       |  2 +-
 6 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/include/asm/xen/interface.h b/arch/arm/include/asm/xen/interface.h
index 74c72b5083a6..ae05e56dd17d 100644
--- a/arch/arm/include/asm/xen/interface.h
+++ b/arch/arm/include/asm/xen/interface.h
@@ -9,8 +9,11 @@
 
 #include <linux/types.h>
 
+#define uint64_aligned_t uint64_t __attribute__((aligned(8)))
+
 #define __DEFINE_GUEST_HANDLE(name, type) \
-	typedef type * __guest_handle_ ## name
+	typedef struct { union { type *p; uint64_aligned_t q; }; }  \
+        __guest_handle_ ## name
 
 #define DEFINE_GUEST_HANDLE_STRUCT(name) \
 	__DEFINE_GUEST_HANDLE(name, struct name)
@@ -21,13 +24,14 @@
 	do {						\
 		if (sizeof(hnd) == 8)			\
 			*(uint64_t *)&(hnd) = 0;	\
-		(hnd) = val;				\
+		(hnd).p = val;				\
 	} while (0)
 
 #ifndef __ASSEMBLY__
 /* Explicitly size integers that represent pfns in the interface with
  * Xen so that we can have one ABI that works for 32 and 64 bit guests. */
 typedef uint64_t xen_pfn_t;
+typedef uint64_t xen_ulong_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint,  unsigned int);
diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index 3d52a5bbd857..e88c5de27410 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -71,6 +71,7 @@
  * with Xen so that we could have one ABI that works for 32 and 64 bit
  * guests. */
 typedef unsigned long xen_pfn_t;
+typedef unsigned long xen_ulong_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint, unsigned int);
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index 555f94d3637b..28fc6211a79a 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -51,6 +51,7 @@
  * with Xen so that on ARM we can have one ABI that works for 32 and 64
  * bit guests. */
 typedef unsigned long xen_pfn_t;
+typedef unsigned long xen_ulong_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint,  unsigned int);
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index d8e33a93ea4d..b66d04ce6957 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -34,7 +34,7 @@ struct xen_memory_reservation {
     GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
-    unsigned long  nr_extents;
+    xen_ulong_t  nr_extents;
     unsigned int   extent_order;
 
     /*
@@ -92,7 +92,7 @@ struct xen_memory_exchange {
      *     command will be non-zero.
      *  5. THIS FIELD MUST BE INITIALISED TO ZERO BY THE CALLER!
      */
-    unsigned long nr_exchanged;
+    xen_ulong_t nr_exchanged;
 };
 
 DEFINE_GUEST_HANDLE_STRUCT(xen_memory_exchange);
@@ -148,8 +148,8 @@ DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mfn_list);
  */
 #define XENMEM_machphys_mapping     12
 struct xen_machphys_mapping {
-    unsigned long v_start, v_end; /* Start and end virtual addresses.   */
-    unsigned long max_mfn;        /* Maximum MFN that can be looked up. */
+    xen_ulong_t v_start, v_end; /* Start and end virtual addresses.   */
+    xen_ulong_t max_mfn;        /* Maximum MFN that can be looked up. */
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_machphys_mapping_t);
 
@@ -172,7 +172,7 @@ struct xen_add_to_physmap {
     unsigned int space;
 
     /* Index into source mapping space. */
-    unsigned long idx;
+    xen_ulong_t idx;
 
     /* GPFN where the source mapping page should appear. */
     xen_pfn_t gpfn;
@@ -189,7 +189,7 @@ struct xen_translate_gpfn_list {
     domid_t domid;
 
     /* Length of list. */
-    unsigned long nr_gpfns;
+    xen_ulong_t nr_gpfns;
 
     /* List of GPFNs to translate. */
     GUEST_HANDLE(ulong) gpfn_list;
diff --git a/include/xen/interface/physdev.h b/include/xen/interface/physdev.h
index 9ce788d8cf49..f616514f781b 100644
--- a/include/xen/interface/physdev.h
+++ b/include/xen/interface/physdev.h
@@ -56,7 +56,7 @@ struct physdev_eoi {
 #define PHYSDEVOP_pirq_eoi_gmfn_v2       28
 struct physdev_pirq_eoi_gmfn {
     /* IN */
-    unsigned long gmfn;
+    xen_ulong_t gmfn;
 };
 
 /*
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index dd58cf5ea3e4..3030c81c09ce 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -45,7 +45,7 @@ struct xen_changeset_info {
 
 #define XENVER_platform_parameters 5
 struct xen_platform_parameters {
-    unsigned long virt_start;
+    xen_ulong_t virt_start;
 };
 
 #define XENVER_get_features 6
-- 
cgit v1.2.3


From 0ec53ecf38bcbf95b4b057328a8fbba4d22ef28b Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Fri, 14 Sep 2012 13:37:32 +0000
Subject: xen/arm: receive Xen events on ARM

Compile events.c on ARM.
Parse, map and enable the IRQ to get event notifications from the device
tree (node "/xen").

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/arm/include/asm/xen/events.h | 18 ++++++++++++++++++
 arch/arm/xen/enlighten.c          | 33 +++++++++++++++++++++++++++++++++
 arch/x86/xen/enlighten.c          |  1 +
 arch/x86/xen/irq.c                |  1 +
 arch/x86/xen/xen-ops.h            |  1 -
 drivers/xen/events.c              | 17 ++++++++++++++---
 include/xen/events.h              |  2 ++
 7 files changed, 69 insertions(+), 4 deletions(-)
 create mode 100644 arch/arm/include/asm/xen/events.h

(limited to 'arch/x86')

diff --git a/arch/arm/include/asm/xen/events.h b/arch/arm/include/asm/xen/events.h
new file mode 100644
index 000000000000..94b4e9020b02
--- /dev/null
+++ b/arch/arm/include/asm/xen/events.h
@@ -0,0 +1,18 @@
+#ifndef _ASM_ARM_XEN_EVENTS_H
+#define _ASM_ARM_XEN_EVENTS_H
+
+#include <asm/ptrace.h>
+
+enum ipi_vector {
+	XEN_PLACEHOLDER_VECTOR,
+
+	/* Xen IPIs go here */
+	XEN_NR_IPIS,
+};
+
+static inline int xen_irqs_disabled(struct pt_regs *regs)
+{
+	return raw_irqs_disabled_flags(regs->ARM_cpsr);
+}
+
+#endif /* _ASM_ARM_XEN_EVENTS_H */
diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c
index 036a4d84e861..bad67ad43c2d 100644
--- a/arch/arm/xen/enlighten.c
+++ b/arch/arm/xen/enlighten.c
@@ -1,4 +1,5 @@
 #include <xen/xen.h>
+#include <xen/events.h>
 #include <xen/grant_table.h>
 #include <xen/hvm.h>
 #include <xen/interface/xen.h>
@@ -9,6 +10,8 @@
 #include <xen/xenbus.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
+#include <linux/interrupt.h>
+#include <linux/irqreturn.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
@@ -33,6 +36,8 @@ EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 int xen_platform_pci_unplug = XEN_UNPLUG_ALL;
 EXPORT_SYMBOL_GPL(xen_platform_pci_unplug);
 
+static __read_mostly int xen_events_irq = -1;
+
 int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 			       unsigned long addr,
 			       unsigned long mfn, int nr,
@@ -74,6 +79,9 @@ static int __init xen_guest_init(void)
 	if (of_address_to_resource(node, GRANT_TABLE_PHYSADDR, &res))
 		return 0;
 	xen_hvm_resume_frames = res.start >> PAGE_SHIFT;
+	xen_events_irq = irq_of_parse_and_map(node, 0);
+	pr_info("Xen %s support found, events_irq=%d gnttab_frame_pfn=%lx\n",
+			version, xen_events_irq, xen_hvm_resume_frames);
 	xen_domain_type = XEN_HVM_DOMAIN;
 
 	xen_setup_features();
@@ -115,3 +123,28 @@ static int __init xen_guest_init(void)
 	return 0;
 }
 core_initcall(xen_guest_init);
+
+static irqreturn_t xen_arm_callback(int irq, void *arg)
+{
+	xen_hvm_evtchn_do_upcall();
+	return IRQ_HANDLED;
+}
+
+static int __init xen_init_events(void)
+{
+	if (!xen_domain() || xen_events_irq < 0)
+		return -ENODEV;
+
+	xen_init_IRQ();
+
+	if (request_percpu_irq(xen_events_irq, xen_arm_callback,
+			"events", xen_vcpu)) {
+		pr_err("Error requesting IRQ %d\n", xen_events_irq);
+		return -EINVAL;
+	}
+
+	enable_percpu_irq(xen_events_irq, 0);
+
+	return 0;
+}
+postcore_initcall(xen_init_events);
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 47b3acdc2ac5..689a4c9da866 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
 #include <linux/memblock.h>
 
 #include <xen/xen.h>
+#include <xen/events.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/version.h>
 #include <xen/interface/physdev.h>
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 157337657971..01a4dc015ae1 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -5,6 +5,7 @@
 #include <xen/interface/xen.h>
 #include <xen/interface/sched.h>
 #include <xen/interface/vcpu.h>
+#include <xen/events.h>
 
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index bb5a8105ea86..a95b41744ad0 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -35,7 +35,6 @@ void xen_set_pat(u64);
 
 char * __init xen_memory_setup(void);
 void __init xen_arch_setup(void);
-void __init xen_init_IRQ(void);
 void xen_enable_sysenter(void);
 void xen_enable_syscall(void);
 void xen_vcpu_restore(void);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c
index c60d1629c916..8672211555bb 100644
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -31,14 +31,16 @@
 #include <linux/irqnr.h>
 #include <linux/pci.h>
 
+#ifdef CONFIG_X86
 #include <asm/desc.h>
 #include <asm/ptrace.h>
 #include <asm/irq.h>
 #include <asm/idle.h>
 #include <asm/io_apic.h>
-#include <asm/sync_bitops.h>
 #include <asm/xen/page.h>
 #include <asm/xen/pci.h>
+#endif
+#include <asm/sync_bitops.h>
 #include <asm/xen/hypercall.h>
 #include <asm/xen/hypervisor.h>
 
@@ -50,6 +52,9 @@
 #include <xen/interface/event_channel.h>
 #include <xen/interface/hvm/hvm_op.h>
 #include <xen/interface/hvm/params.h>
+#include <xen/interface/physdev.h>
+#include <xen/interface/sched.h>
+#include <asm/hw_irq.h>
 
 /*
  * This lock protects updates to the following mapping and reference-count
@@ -1386,7 +1391,9 @@ void xen_evtchn_do_upcall(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
 
+#ifdef CONFIG_X86
 	exit_idle();
+#endif
 	irq_enter();
 
 	__xen_evtchn_do_upcall();
@@ -1795,9 +1802,9 @@ void xen_callback_vector(void)
 void xen_callback_vector(void) {}
 #endif
 
-void __init xen_init_IRQ(void)
+void xen_init_IRQ(void)
 {
-	int i, rc;
+	int i;
 
 	evtchn_to_irq = kcalloc(NR_EVENT_CHANNELS, sizeof(*evtchn_to_irq),
 				    GFP_KERNEL);
@@ -1813,6 +1820,7 @@ void __init xen_init_IRQ(void)
 
 	pirq_needs_eoi = pirq_needs_eoi_flag;
 
+#ifdef CONFIG_X86
 	if (xen_hvm_domain()) {
 		xen_callback_vector();
 		native_init_IRQ();
@@ -1820,6 +1828,7 @@ void __init xen_init_IRQ(void)
 		 * __acpi_register_gsi can point at the right function */
 		pci_xen_hvm_init();
 	} else {
+		int rc;
 		struct physdev_pirq_eoi_gmfn eoi_gmfn;
 
 		irq_ctx_init(smp_processor_id());
@@ -1835,4 +1844,6 @@ void __init xen_init_IRQ(void)
 		} else
 			pirq_needs_eoi = pirq_check_eoi_map;
 	}
+#endif
 }
+EXPORT_SYMBOL_GPL(xen_init_IRQ);
diff --git a/include/xen/events.h b/include/xen/events.h
index 04399b28e821..c6bfe01acf6b 100644
--- a/include/xen/events.h
+++ b/include/xen/events.h
@@ -109,4 +109,6 @@ int xen_irq_from_gsi(unsigned gsi);
 /* Determine whether to ignore this IRQ if it is passed to a guest. */
 int xen_test_irq_shared(int irq);
 
+/* initialize Xen IRQ subsystem */
+void xen_init_IRQ(void);
 #endif	/* _XEN_EVENTS_H */
-- 
cgit v1.2.3


From 64eb0620296f924d5fded755c5ed95fb73649e06 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 8 Aug 2012 15:24:36 +0300
Subject: KVM: correctly detect APIC SW state in kvm_apic_post_state_restore()

For apic_set_spiv() to track APIC SW state correctly it needs to see
previous and next values of the spurious vector register, but currently
memset() overwrite the old value before apic_set_spiv() get a chance to
do tracking. Fix it by calling apic_set_spiv() before overwriting old
value.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 7 +++++--
 arch/x86/kvm/lapic.h | 3 ++-
 arch/x86/kvm/x86.c   | 3 +--
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 333c27fa6e9f..18d149d80209 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1389,13 +1389,16 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
 	return vector;
 }
 
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu)
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 
 	kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
+	/* set SPIV separately to get count of SW disabled APICs right */
+	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
+	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
 	kvm_apic_set_version(vcpu);
-	apic_set_spiv(apic, kvm_apic_get_reg(apic, APIC_SPIV));
 
 	apic_update_ppr(apic);
 	hrtimer_cancel(&apic->lapic_timer.timer);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 2ad9caa06f94..615a8b030168 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -54,7 +54,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
-void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu);
+void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
+		struct kvm_lapic_state *s);
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
 
 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8ebf65c349eb..91a595827deb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2348,8 +2348,7 @@ static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 				    struct kvm_lapic_state *s)
 {
-	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
-	kvm_apic_post_state_restore(vcpu);
+	kvm_apic_post_state_restore(vcpu, s);
 	update_cr8_intercept(vcpu);
 
 	return 0;
-- 
cgit v1.2.3


From 4670a300a2169e1e922593c5d35b0cdaee112901 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Thu, 9 Aug 2012 10:59:21 -0700
Subject: x86/mce: Make cmci_discover() quiet

cmci_discover() works out which machine check banks support CMCI, and
which of those are shared by multiple logical processors. It uses this
information to ensure that exactly one cpu is designated the owner of
each bank so that when interrupts are broadcast to multiple cpus, only one
of them will look in a shared bank to log the error and clear the bank.

At boot time cmci_discover() performs this task silently. But during
certain cpu hotplug operations it prints out a set of summary lines
like this:

CPU 35 MCA banks CMCI:0 CMCI:1 CMCI:3 CMCI:5 CMCI:6 CMCI:7 CMCI:8 CMCI:9 CMCI:10 CMCI:11
CPU 1 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 39 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 38 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 32 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 37 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 36 MCA banks CMCI:0 CMCI:1 CMCI:3
CPU 34 MCA banks CMCI:0 CMCI:1 CMCI:3

The value of these messages seems very low. A user might painstakingly
cross-check against the data sheet for a processor to ensure that all
CMCI supported banks are correctly reported, but this seems improbable.
If users really wanted to do this, we should print the information at
boot time too.

Remove the messages.

Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce_intel.c | 25 ++++++-------------------
 1 file changed, 6 insertions(+), 19 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 38e49bc95ffc..59648e48a145 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -65,24 +65,15 @@ static void intel_threshold_interrupt(void)
 	mce_notify_irq();
 }
 
-static void print_update(char *type, int *hdr, int num)
-{
-	if (*hdr == 0)
-		printk(KERN_INFO "CPU %d MCA banks", smp_processor_id());
-	*hdr = 1;
-	printk(KERN_CONT " %s:%d", type, num);
-}
-
 /*
  * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks
  * on this CPU. Use the algorithm recommended in the SDM to discover shared
  * banks.
  */
-static void cmci_discover(int banks, int boot)
+static void cmci_discover(int banks)
 {
 	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
 	unsigned long flags;
-	int hdr = 0;
 	int i;
 
 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
@@ -96,8 +87,7 @@ static void cmci_discover(int banks, int boot)
 
 		/* Already owned by someone else? */
 		if (val & MCI_CTL2_CMCI_EN) {
-			if (test_and_clear_bit(i, owned) && !boot)
-				print_update("SHD", &hdr, i);
+			clear_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 			continue;
 		}
@@ -109,16 +99,13 @@ static void cmci_discover(int banks, int boot)
 
 		/* Did the enable bit stick? -- the bank supports CMCI */
 		if (val & MCI_CTL2_CMCI_EN) {
-			if (!test_and_set_bit(i, owned) && !boot)
-				print_update("CMCI", &hdr, i);
+			set_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
 		} else {
 			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 		}
 	}
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
-	if (hdr)
-		printk(KERN_CONT "\n");
 }
 
 /*
@@ -186,7 +173,7 @@ void cmci_rediscover(int dying)
 			continue;
 		/* Recheck banks in case CPUs don't all have the same */
 		if (cmci_supported(&banks))
-			cmci_discover(banks, 0);
+			cmci_discover(banks);
 	}
 
 	set_cpus_allowed_ptr(current, old);
@@ -200,7 +187,7 @@ void cmci_reenable(void)
 {
 	int banks;
 	if (cmci_supported(&banks))
-		cmci_discover(banks, 0);
+		cmci_discover(banks);
 }
 
 static void intel_init_cmci(void)
@@ -211,7 +198,7 @@ static void intel_init_cmci(void)
 		return;
 
 	mce_threshold_vector = intel_threshold_interrupt;
-	cmci_discover(banks, 1);
+	cmci_discover(banks);
 	/*
 	 * For CPU #0 this runs with still disabled APIC, but that's
 	 * ok because only the vector is set up. We still do another
-- 
cgit v1.2.3


From 55babd8f41f122f5f4c7cebf520c766c983282c6 Mon Sep 17 00:00:00 2001
From: Chen Gong <gong.chen@linux.intel.com>
Date: Thu, 9 Aug 2012 11:44:51 -0700
Subject: x86/mce: Add CMCI poll mode

On Intel systems corrected machine check interrupts (CMCI) may be sent to
multiple logical processors; possibly to all processors on the affected
socket (SDM Volume 3B "15.5.1 CMCI Local APIC Interface").  This means
that a persistent error (such as a stuck bit in ECC memory) may cause
a storm of interrupts that greatly hinders or prevents forward progress
(probably on many processors).

To solve this we keep track of the rate at which each processor sees
CMCI. If we exceed a threshold, we disable CMCI delivery and switch to
polling the machine check banks. If the storm subsides (none of the
affected processors see any more errors for a complete poll interval) we
re-enable CMCI.

[Tony: Added console messages when storm begins/ends and increased storm
threshold from 5 to 15 so we have a few more logged entries before we
disable interrupts and start dropping reports]

Signed-off-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Chen Gong <gong.chen@linux.intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/kernel/cpu/mcheck/mce-internal.h |  12 ++++
 arch/x86/kernel/cpu/mcheck/mce.c          |  47 +++++++++++--
 arch/x86/kernel/cpu/mcheck/mce_intel.c    | 108 +++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index ed44c8a65858..6a05c1d327a9 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -28,6 +28,18 @@ extern int mce_ser;
 
 extern struct mce_bank *mce_banks;
 
+#ifdef CONFIG_X86_MCE_INTEL
+unsigned long mce_intel_adjust_timer(unsigned long interval);
+void mce_intel_cmci_poll(void);
+void mce_intel_hcpu_update(unsigned long cpu);
+#else
+# define mce_intel_adjust_timer mce_adjust_timer_default
+static inline void mce_intel_cmci_poll(void) { }
+static inline void mce_intel_hcpu_update(unsigned long cpu) { }
+#endif
+
+void mce_timer_kick(unsigned long interval);
+
 #ifdef CONFIG_ACPI_APEI
 int apei_write_mce(struct mce *m);
 ssize_t apei_read_mce(struct mce *m, u64 *record_id);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index b4dde1527edd..8c1beea6cabf 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1260,6 +1260,14 @@ static unsigned long check_interval = 5 * 60; /* 5 minutes */
 static DEFINE_PER_CPU(unsigned long, mce_next_interval); /* in jiffies */
 static DEFINE_PER_CPU(struct timer_list, mce_timer);
 
+static unsigned long mce_adjust_timer_default(unsigned long interval)
+{
+	return interval;
+}
+
+static unsigned long (*mce_adjust_timer)(unsigned long interval) =
+	mce_adjust_timer_default;
+
 static void mce_timer_fn(unsigned long data)
 {
 	struct timer_list *t = &__get_cpu_var(mce_timer);
@@ -1270,6 +1278,7 @@ static void mce_timer_fn(unsigned long data)
 	if (mce_available(__this_cpu_ptr(&cpu_info))) {
 		machine_check_poll(MCP_TIMESTAMP,
 				&__get_cpu_var(mce_poll_banks));
+		mce_intel_cmci_poll();
 	}
 
 	/*
@@ -1277,14 +1286,38 @@ static void mce_timer_fn(unsigned long data)
 	 * polling interval, otherwise increase the polling interval.
 	 */
 	iv = __this_cpu_read(mce_next_interval);
-	if (mce_notify_irq())
+	if (mce_notify_irq()) {
 		iv = max(iv / 2, (unsigned long) HZ/100);
-	else
+	} else {
 		iv = min(iv * 2, round_jiffies_relative(check_interval * HZ));
+		iv = mce_adjust_timer(iv);
+	}
 	__this_cpu_write(mce_next_interval, iv);
+	/* Might have become 0 after CMCI storm subsided */
+	if (iv) {
+		t->expires = jiffies + iv;
+		add_timer_on(t, smp_processor_id());
+	}
+}
 
-	t->expires = jiffies + iv;
-	add_timer_on(t, smp_processor_id());
+/*
+ * Ensure that the timer is firing in @interval from now.
+ */
+void mce_timer_kick(unsigned long interval)
+{
+	struct timer_list *t = &__get_cpu_var(mce_timer);
+	unsigned long when = jiffies + interval;
+	unsigned long iv = __this_cpu_read(mce_next_interval);
+
+	if (timer_pending(t)) {
+		if (time_before(when, t->expires))
+			mod_timer_pinned(t, when);
+	} else {
+		t->expires = round_jiffies(when);
+		add_timer_on(t, smp_processor_id());
+	}
+	if (interval < iv)
+		__this_cpu_write(mce_next_interval, interval);
 }
 
 /* Must not be called in IRQ context where del_timer_sync() can deadlock */
@@ -1548,6 +1581,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 	switch (c->x86_vendor) {
 	case X86_VENDOR_INTEL:
 		mce_intel_feature_init(c);
+		mce_adjust_timer = mce_intel_adjust_timer;
 		break;
 	case X86_VENDOR_AMD:
 		mce_amd_feature_init(c);
@@ -1559,7 +1593,7 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
 
 static void mce_start_timer(unsigned int cpu, struct timer_list *t)
 {
-	unsigned long iv = check_interval * HZ;
+	unsigned long iv = mce_adjust_timer(check_interval * HZ);
 
 	__this_cpu_write(mce_next_interval, iv);
 
@@ -2272,10 +2306,11 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		if (threshold_cpu_callback)
 			threshold_cpu_callback(action, cpu);
 		mce_device_remove(cpu);
+		mce_intel_hcpu_update(cpu);
 		break;
 	case CPU_DOWN_PREPARE:
-		del_timer_sync(t);
 		smp_call_function_single(cpu, mce_disable_cpu, &action, 1);
+		del_timer_sync(t);
 		break;
 	case CPU_DOWN_FAILED:
 		smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 59648e48a145..098386fed48e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -15,6 +15,8 @@
 #include <asm/msr.h>
 #include <asm/mce.h>
 
+#include "mce-internal.h"
+
 /*
  * Support for Intel Correct Machine Check Interrupts. This allows
  * the CPU to raise an interrupt when a corrected machine check happened.
@@ -30,7 +32,22 @@ static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned);
  */
 static DEFINE_RAW_SPINLOCK(cmci_discover_lock);
 
-#define CMCI_THRESHOLD 1
+#define CMCI_THRESHOLD		1
+#define CMCI_POLL_INTERVAL	(30 * HZ)
+#define CMCI_STORM_INTERVAL	(1 * HZ)
+#define CMCI_STORM_THRESHOLD	15
+
+static DEFINE_PER_CPU(unsigned long, cmci_time_stamp);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_cnt);
+static DEFINE_PER_CPU(unsigned int, cmci_storm_state);
+
+enum {
+	CMCI_STORM_NONE,
+	CMCI_STORM_ACTIVE,
+	CMCI_STORM_SUBSIDED,
+};
+
+static atomic_t cmci_storm_on_cpus;
 
 static int cmci_supported(int *banks)
 {
@@ -53,6 +70,93 @@ static int cmci_supported(int *banks)
 	return !!(cap & MCG_CMCI_P);
 }
 
+void mce_intel_cmci_poll(void)
+{
+	if (__this_cpu_read(cmci_storm_state) == CMCI_STORM_NONE)
+		return;
+	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
+}
+
+void mce_intel_hcpu_update(unsigned long cpu)
+{
+	if (per_cpu(cmci_storm_state, cpu) == CMCI_STORM_ACTIVE)
+		atomic_dec(&cmci_storm_on_cpus);
+
+	per_cpu(cmci_storm_state, cpu) = CMCI_STORM_NONE;
+}
+
+unsigned long mce_intel_adjust_timer(unsigned long interval)
+{
+	int r;
+
+	if (interval < CMCI_POLL_INTERVAL)
+		return interval;
+
+	switch (__this_cpu_read(cmci_storm_state)) {
+	case CMCI_STORM_ACTIVE:
+		/*
+		 * We switch back to interrupt mode once the poll timer has
+		 * silenced itself. That means no events recorded and the
+		 * timer interval is back to our poll interval.
+		 */
+		__this_cpu_write(cmci_storm_state, CMCI_STORM_SUBSIDED);
+		r = atomic_sub_return(1, &cmci_storm_on_cpus);
+		if (r == 0)
+			pr_notice("CMCI storm subsided: switching to interrupt mode\n");
+		/* FALLTHROUGH */
+
+	case CMCI_STORM_SUBSIDED:
+		/*
+		 * We wait for all cpus to go back to SUBSIDED
+		 * state. When that happens we switch back to
+		 * interrupt mode.
+		 */
+		if (!atomic_read(&cmci_storm_on_cpus)) {
+			__this_cpu_write(cmci_storm_state, CMCI_STORM_NONE);
+			cmci_reenable();
+			cmci_recheck();
+		}
+		return CMCI_POLL_INTERVAL;
+	default:
+		/*
+		 * We have shiny weather. Let the poll do whatever it
+		 * thinks.
+		 */
+		return interval;
+	}
+}
+
+static bool cmci_storm_detect(void)
+{
+	unsigned int cnt = __this_cpu_read(cmci_storm_cnt);
+	unsigned long ts = __this_cpu_read(cmci_time_stamp);
+	unsigned long now = jiffies;
+	int r;
+
+	if (__this_cpu_read(cmci_storm_state) != CMCI_STORM_NONE)
+		return true;
+
+	if (time_before_eq(now, ts + CMCI_STORM_INTERVAL)) {
+		cnt++;
+	} else {
+		cnt = 1;
+		__this_cpu_write(cmci_time_stamp, now);
+	}
+	__this_cpu_write(cmci_storm_cnt, cnt);
+
+	if (cnt <= CMCI_STORM_THRESHOLD)
+		return false;
+
+	cmci_clear();
+	__this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE);
+	r = atomic_add_return(1, &cmci_storm_on_cpus);
+	mce_timer_kick(CMCI_POLL_INTERVAL);
+
+	if (r == 1)
+		pr_notice("CMCI storm detected: switching to poll mode\n");
+	return true;
+}
+
 /*
  * The interrupt handler. This is called on every event.
  * Just call the poller directly to log any events.
@@ -61,6 +165,8 @@ static int cmci_supported(int *banks)
  */
 static void intel_threshold_interrupt(void)
 {
+	if (cmci_storm_detect())
+		return;
 	machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
 	mce_notify_irq();
 }
-- 
cgit v1.2.3


From c5e63197db519bae1c33e41ea0342a50f39e7a93 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:36 +0200
Subject: perf: Unified API to record selective sets of arch registers

This brings a new API to help the selective dump of registers on event
sampling, and its implementation for x86 arch.

Added HAVE_PERF_REGS config option to determine if the architecture
provides perf registers ABI.

The information about desired registers will be passed in u64 mask.
It's up to the architecture to map the registers into the mask bits.

For the x86 arch implementation, both 32 and 64 bit registers bits are
defined within single enum to ensure 64 bit system can provide register
dump for compat task if needed in the future.

Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
[ Added missing linux/errno.h include ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-2-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/Kconfig                     |  6 +++
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/perf_regs.h | 33 +++++++++++++++
 arch/x86/kernel/Makefile         |  2 +
 arch/x86/kernel/perf_regs.c      | 90 ++++++++++++++++++++++++++++++++++++++++
 include/linux/perf_regs.h        | 19 +++++++++
 6 files changed, 151 insertions(+)
 create mode 100644 arch/x86/include/asm/perf_regs.h
 create mode 100644 arch/x86/kernel/perf_regs.c
 create mode 100644 include/linux/perf_regs.h

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa189cc5..68d827b7ae82 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -222,6 +222,12 @@ config HAVE_PERF_EVENTS_NMI
 	  subsystem.  Also has support for calculating CPU cycle events
 	  to determine how many clock cycles in a given period.
 
+config HAVE_PERF_REGS
+	bool
+	help
+	  Support selective register dumps for perf events. This includes
+	  bit-mapping of each registers and a unique architecture id.
+
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..3fab6ec9edc4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -60,6 +60,7 @@ config X86
 	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
+	select HAVE_PERF_REGS
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/arch/x86/include/asm/perf_regs.h b/arch/x86/include/asm/perf_regs.h
new file mode 100644
index 000000000000..3f2207bfd17b
--- /dev/null
+++ b/arch/x86/include/asm/perf_regs.h
@@ -0,0 +1,33 @@
+#ifndef _ASM_X86_PERF_REGS_H
+#define _ASM_X86_PERF_REGS_H
+
+enum perf_event_x86_regs {
+	PERF_REG_X86_AX,
+	PERF_REG_X86_BX,
+	PERF_REG_X86_CX,
+	PERF_REG_X86_DX,
+	PERF_REG_X86_SI,
+	PERF_REG_X86_DI,
+	PERF_REG_X86_BP,
+	PERF_REG_X86_SP,
+	PERF_REG_X86_IP,
+	PERF_REG_X86_FLAGS,
+	PERF_REG_X86_CS,
+	PERF_REG_X86_SS,
+	PERF_REG_X86_DS,
+	PERF_REG_X86_ES,
+	PERF_REG_X86_FS,
+	PERF_REG_X86_GS,
+	PERF_REG_X86_R8,
+	PERF_REG_X86_R9,
+	PERF_REG_X86_R10,
+	PERF_REG_X86_R11,
+	PERF_REG_X86_R12,
+	PERF_REG_X86_R13,
+	PERF_REG_X86_R14,
+	PERF_REG_X86_R15,
+
+	PERF_REG_X86_32_MAX = PERF_REG_X86_GS + 1,
+	PERF_REG_X86_64_MAX = PERF_REG_X86_R15 + 1,
+};
+#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..8d7a619718b5 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -100,6 +100,8 @@ obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 obj-$(CONFIG_OF)			+= devicetree.o
 obj-$(CONFIG_UPROBES)			+= uprobes.o
 
+obj-$(CONFIG_PERF_EVENTS)		+= perf_regs.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
new file mode 100644
index 000000000000..3d6923528b1c
--- /dev/null
+++ b/arch/x86/kernel/perf_regs.c
@@ -0,0 +1,90 @@
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/bug.h>
+#include <linux/stddef.h>
+#include <asm/perf_regs.h>
+#include <asm/ptrace.h>
+
+#ifdef CONFIG_X86_32
+#define PERF_REG_X86_MAX PERF_REG_X86_32_MAX
+#else
+#define PERF_REG_X86_MAX PERF_REG_X86_64_MAX
+#endif
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
+	PT_REGS_OFFSET(PERF_REG_X86_AX, ax),
+	PT_REGS_OFFSET(PERF_REG_X86_BX, bx),
+	PT_REGS_OFFSET(PERF_REG_X86_CX, cx),
+	PT_REGS_OFFSET(PERF_REG_X86_DX, dx),
+	PT_REGS_OFFSET(PERF_REG_X86_SI, si),
+	PT_REGS_OFFSET(PERF_REG_X86_DI, di),
+	PT_REGS_OFFSET(PERF_REG_X86_BP, bp),
+	PT_REGS_OFFSET(PERF_REG_X86_SP, sp),
+	PT_REGS_OFFSET(PERF_REG_X86_IP, ip),
+	PT_REGS_OFFSET(PERF_REG_X86_FLAGS, flags),
+	PT_REGS_OFFSET(PERF_REG_X86_CS, cs),
+	PT_REGS_OFFSET(PERF_REG_X86_SS, ss),
+#ifdef CONFIG_X86_32
+	PT_REGS_OFFSET(PERF_REG_X86_DS, ds),
+	PT_REGS_OFFSET(PERF_REG_X86_ES, es),
+	PT_REGS_OFFSET(PERF_REG_X86_FS, fs),
+	PT_REGS_OFFSET(PERF_REG_X86_GS, gs),
+#else
+	/*
+	 * The pt_regs struct does not store
+	 * ds, es, fs, gs in 64 bit mode.
+	 */
+	(unsigned int) -1,
+	(unsigned int) -1,
+	(unsigned int) -1,
+	(unsigned int) -1,
+#endif
+#ifdef CONFIG_X86_64
+	PT_REGS_OFFSET(PERF_REG_X86_R8, r8),
+	PT_REGS_OFFSET(PERF_REG_X86_R9, r9),
+	PT_REGS_OFFSET(PERF_REG_X86_R10, r10),
+	PT_REGS_OFFSET(PERF_REG_X86_R11, r11),
+	PT_REGS_OFFSET(PERF_REG_X86_R12, r12),
+	PT_REGS_OFFSET(PERF_REG_X86_R13, r13),
+	PT_REGS_OFFSET(PERF_REG_X86_R14, r14),
+	PT_REGS_OFFSET(PERF_REG_X86_R15, r15),
+#endif
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset)))
+		return 0;
+
+	return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+#define REG_RESERVED (~((1ULL << PERF_REG_X86_MAX) - 1ULL))
+
+#ifdef CONFIG_X86_32
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	return 0;
+}
+#else /* CONFIG_X86_64 */
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+		       (1ULL << PERF_REG_X86_ES) | \
+		       (1ULL << PERF_REG_X86_FS) | \
+		       (1ULL << PERF_REG_X86_GS))
+
+int perf_reg_validate(u64 mask)
+{
+	if (!mask || mask & REG_RESERVED)
+		return -EINVAL;
+
+	if (mask & REG_NOSUPPORT)
+		return -EINVAL;
+
+	return 0;
+}
+#endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
new file mode 100644
index 000000000000..a2f1a98f7839
--- /dev/null
+++ b/include/linux/perf_regs.h
@@ -0,0 +1,19 @@
+#ifndef _LINUX_PERF_REGS_H
+#define _LINUX_PERF_REGS_H
+
+#ifdef CONFIG_HAVE_PERF_REGS
+#include <asm/perf_regs.h>
+u64 perf_reg_value(struct pt_regs *regs, int idx);
+int perf_reg_validate(u64 mask);
+#else
+static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+	return 0;
+}
+
+static inline int perf_reg_validate(u64 mask)
+{
+	return mask ? -ENOSYS : 0;
+}
+#endif /* CONFIG_HAVE_PERF_REGS */
+#endif /* _LINUX_PERF_REGS_H */
-- 
cgit v1.2.3


From 4018994f3d8785275ef0e7391b75c3462c029e56 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:37 +0200
Subject: perf: Add ability to attach user level registers dump to sample

Introducing PERF_SAMPLE_REGS_USER sample type bit to trigger the dump of
user level registers on sample. Registers we want to dump are specified
by sample_regs_user bitmask.

Only user level registers are dumped at the moment. Meaning the register
values of the user space context as it was before the user entered the
kernel for whatever reason (syscall, irq, exception, or a PMI happening
in userspace).

The layout of the sample_regs_user bitmap is described in
asm/perf_regs.h for archs that support register dump.

This is going to be useful to bring Dwarf CFI based stack unwinding on
top of samples.

Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
[ Dump registers ABI specification. ]
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Suggested-by: Stephane Eranian <eranian@google.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-3-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/perf_regs.c | 15 +++++++++++
 include/linux/perf_event.h  | 35 +++++++++++++++++++++---
 include/linux/perf_regs.h   |  6 +++++
 kernel/events/core.c        | 66 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 119 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 3d6923528b1c..c5a3e5cfe07f 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -1,5 +1,7 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/perf_event.h>
 #include <linux/bug.h>
 #include <linux/stddef.h>
 #include <asm/perf_regs.h>
@@ -71,6 +73,11 @@ int perf_reg_validate(u64 mask)
 
 	return 0;
 }
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_32;
+}
 #else /* CONFIG_X86_64 */
 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
 		       (1ULL << PERF_REG_X86_ES) | \
@@ -87,4 +94,12 @@ int perf_reg_validate(u64 mask)
 
 	return 0;
 }
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+	if (test_tsk_thread_flag(task, TIF_IA32))
+		return PERF_SAMPLE_REGS_ABI_32;
+	else
+		return PERF_SAMPLE_REGS_ABI_64;
+}
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7602ccb3f40e..3d4d84745f07 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -130,8 +130,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_STREAM_ID			= 1U << 9,
 	PERF_SAMPLE_RAW				= 1U << 10,
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
+	PERF_SAMPLE_REGS_USER			= 1U << 12,
 
-	PERF_SAMPLE_MAX = 1U << 12,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 13,		/* non-ABI */
 };
 
 /*
@@ -162,6 +163,15 @@ enum perf_branch_sample_type {
 	 PERF_SAMPLE_BRANCH_KERNEL|\
 	 PERF_SAMPLE_BRANCH_HV)
 
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+	PERF_SAMPLE_REGS_ABI_NONE	= 0,
+	PERF_SAMPLE_REGS_ABI_32		= 1,
+	PERF_SAMPLE_REGS_ABI_64		= 2,
+};
+
 /*
  * The format of the data returned by read() on a perf event fd,
  * as specified by attr.read_format:
@@ -194,6 +204,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER0	64	/* sizeof first published struct */
 #define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
 #define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3	88	/* add: sample_regs_user */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -271,7 +282,13 @@ struct perf_event_attr {
 		__u64		bp_len;
 		__u64		config2; /* extension of config1 */
 	};
-	__u64	branch_sample_type; /* enum branch_sample_type */
+	__u64	branch_sample_type; /* enum perf_branch_sample_type */
+
+	/*
+	 * Defines set of user regs to dump on samples.
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_user;
 };
 
 /*
@@ -548,6 +565,9 @@ enum perf_event_type {
 	 *	  char                  data[size];}&& PERF_SAMPLE_RAW
 	 *
 	 *	{ u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+	 *
+	 * 	{ u64			abi; # enum perf_sample_regs_abi
+	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -609,6 +629,7 @@ struct perf_guest_info_callbacks {
 #include <linux/static_key.h>
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
+#include <linux/perf_regs.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -654,6 +675,11 @@ struct perf_branch_stack {
 	struct perf_branch_entry	entries[0];
 };
 
+struct perf_regs_user {
+	__u64		abi;
+	struct pt_regs	*regs;
+};
+
 struct task_struct;
 
 /*
@@ -1133,6 +1159,7 @@ struct perf_sample_data {
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
+	struct perf_regs_user		regs_user;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -1142,7 +1169,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->addr = addr;
 	data->raw  = NULL;
 	data->br_stack = NULL;
-	data->period	= period;
+	data->period = period;
+	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
+	data->regs_user.regs = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index a2f1a98f7839..3c73d5fe18be 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -5,6 +5,7 @@
 #include <asm/perf_regs.h>
 u64 perf_reg_value(struct pt_regs *regs, int idx);
 int perf_reg_validate(u64 mask);
+u64 perf_reg_abi(struct task_struct *task);
 #else
 static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
@@ -15,5 +16,10 @@ static inline int perf_reg_validate(u64 mask)
 {
 	return mask ? -ENOSYS : 0;
 }
+
+static inline u64 perf_reg_abi(struct task_struct *task)
+{
+	return PERF_SAMPLE_REGS_ABI_NONE;
+}
 #endif /* CONFIG_HAVE_PERF_REGS */
 #endif /* _LINUX_PERF_REGS_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index b7935fcec7d9..d3ce97525b9f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3756,6 +3756,37 @@ int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
 }
 EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
 
+static void
+perf_output_sample_regs(struct perf_output_handle *handle,
+			struct pt_regs *regs, u64 mask)
+{
+	int bit;
+
+	for_each_set_bit(bit, (const unsigned long *) &mask,
+			 sizeof(mask) * BITS_PER_BYTE) {
+		u64 val;
+
+		val = perf_reg_value(regs, bit);
+		perf_output_put(handle, val);
+	}
+}
+
+static void perf_sample_regs_user(struct perf_regs_user *regs_user,
+				  struct pt_regs *regs)
+{
+	if (!user_mode(regs)) {
+		if (current->mm)
+			regs = task_pt_regs(current);
+		else
+			regs = NULL;
+	}
+
+	if (regs) {
+		regs_user->regs = regs;
+		regs_user->abi  = perf_reg_abi(current);
+	}
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -4016,6 +4047,23 @@ void perf_output_sample(struct perf_output_handle *handle,
 			perf_output_put(handle, nr);
 		}
 	}
+
+	if (sample_type & PERF_SAMPLE_REGS_USER) {
+		u64 abi = data->regs_user.abi;
+
+		/*
+		 * If there are no regs to dump, notice it through
+		 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
+		 */
+		perf_output_put(handle, abi);
+
+		if (abi) {
+			u64 mask = event->attr.sample_regs_user;
+			perf_output_sample_regs(handle,
+						data->regs_user.regs,
+						mask);
+		}
+	}
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -4067,6 +4115,20 @@ void perf_prepare_sample(struct perf_event_header *header,
 		}
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_REGS_USER) {
+		/* regs dump ABI info */
+		int size = sizeof(u64);
+
+		perf_sample_regs_user(&data->regs_user, regs);
+
+		if (data->regs_user.regs) {
+			u64 mask = event->attr.sample_regs_user;
+			size += hweight64(mask) * sizeof(u64);
+		}
+
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -6142,6 +6204,10 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 			attr->branch_sample_type = mask;
 		}
 	}
+
+	if (attr->sample_type & PERF_SAMPLE_REGS_USER)
+		ret = perf_reg_validate(attr->sample_regs_user);
+
 out:
 	return ret;
 
-- 
cgit v1.2.3


From 91d7753a45f8525dc75b6be01e427dc1c378dc16 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 7 Aug 2012 15:20:38 +0200
Subject: perf: Factor __output_copy to be usable with specific copy function

Adding a generic way to use __output_copy function with specific copy
function via DEFINE_PERF_OUTPUT_COPY macro.

Using this to add new __output_copy_user function, that provides output
copy from user pointers. For x86 the copy_from_user_nmi function is used
and __copy_from_user_inatomic for the rest of the architectures.

This new function will be used in user stack dump on sample, coming in
next patches.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-4-git-send-email-jolsa@redhat.com
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/include/asm/perf_event.h |  2 ++
 include/linux/perf_event.h        |  2 +-
 kernel/events/internal.h          | 62 ++++++++++++++++++++++++++-------------
 kernel/events/ring_buffer.c       |  4 +--
 4 files changed, 46 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index cb4e43bce98a..4fabcdf1cfa7 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -262,4 +262,6 @@ static inline void perf_check_microcode(void) { }
  static inline void amd_pmu_disable_virt(void) { }
 #endif
 
+#define arch_perf_out_copy_user copy_from_user_nmi
+
 #endif /* _ASM_X86_PERF_EVENT_H */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3d4d84745f07..d41394a1af36 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1319,7 +1319,7 @@ static inline bool has_branch_stack(struct perf_event *event)
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
-extern void perf_output_copy(struct perf_output_handle *handle,
+extern unsigned int perf_output_copy(struct perf_output_handle *handle,
 			     const void *buf, unsigned int len);
 extern int perf_swevent_get_recursion_context(void);
 extern void perf_swevent_put_recursion_context(int rctx);
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index a096c19f2c2a..7fd5408493d2 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -2,6 +2,7 @@
 #define _KERNEL_EVENTS_INTERNAL_H
 
 #include <linux/hardirq.h>
+#include <linux/uaccess.h>
 
 /* Buffer handling */
 
@@ -76,30 +77,49 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
 	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
 }
 
-static inline void
-__output_copy(struct perf_output_handle *handle,
-		   const void *buf, unsigned int len)
+#define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
+static inline unsigned int						\
+func_name(struct perf_output_handle *handle,				\
+	  const void *buf, unsigned int len)				\
+{									\
+	unsigned long size, written;					\
+									\
+	do {								\
+		size = min_t(unsigned long, handle->size, len);		\
+									\
+		written = memcpy_func(handle->addr, buf, size);		\
+									\
+		len -= written;						\
+		handle->addr += written;				\
+		buf += written;						\
+		handle->size -= written;				\
+		if (!handle->size) {					\
+			struct ring_buffer *rb = handle->rb;		\
+									\
+			handle->page++;					\
+			handle->page &= rb->nr_pages - 1;		\
+			handle->addr = rb->data_pages[handle->page];	\
+			handle->size = PAGE_SIZE << page_order(rb);	\
+		}							\
+	} while (len && written == size);				\
+									\
+	return len;							\
+}
+
+static inline int memcpy_common(void *dst, const void *src, size_t n)
 {
-	do {
-		unsigned long size = min_t(unsigned long, handle->size, len);
-
-		memcpy(handle->addr, buf, size);
-
-		len -= size;
-		handle->addr += size;
-		buf += size;
-		handle->size -= size;
-		if (!handle->size) {
-			struct ring_buffer *rb = handle->rb;
-
-			handle->page++;
-			handle->page &= rb->nr_pages - 1;
-			handle->addr = rb->data_pages[handle->page];
-			handle->size = PAGE_SIZE << page_order(rb);
-		}
-	} while (len);
+	memcpy(dst, src, n);
+	return n;
 }
 
+DEFINE_OUTPUT_COPY(__output_copy, memcpy_common)
+
+#ifndef arch_perf_out_copy_user
+#define arch_perf_out_copy_user __copy_from_user_inatomic
+#endif
+
+DEFINE_OUTPUT_COPY(__output_copy_user, arch_perf_out_copy_user)
+
 /* Callchain handling */
 extern struct perf_callchain_entry *
 perf_callchain(struct perf_event *event, struct pt_regs *regs);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 6ddaba43fb7a..b4c2ad3dee7a 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -182,10 +182,10 @@ out:
 	return -ENOSPC;
 }
 
-void perf_output_copy(struct perf_output_handle *handle,
+unsigned int perf_output_copy(struct perf_output_handle *handle,
 		      const void *buf, unsigned int len)
 {
-	__output_copy(handle, buf, len);
+	return __output_copy(handle, buf, len);
 }
 
 void perf_output_end(struct perf_output_handle *handle)
-- 
cgit v1.2.3


From c5ebcedb566ef17bda7b02686e0d658a7bb42ee7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Tue, 7 Aug 2012 15:20:40 +0200
Subject: perf: Add ability to attach user stack dump to sample

Introducing PERF_SAMPLE_STACK_USER sample type bit to trigger the dump
of the user level stack on sample. The size of the dump is specified by
sample_stack_user value.

Being able to dump parts of the user stack, starting from the stack
pointer, will be useful to make a post mortem dwarf CFI based stack
unwinding.

Added HAVE_PERF_USER_STACK_DUMP config option to determine if the
architecture provides user stack dump on perf event samples.  This needs
access to the user stack pointer which is not unified across
architectures. Enabling this for x86 architecture.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Original-patch-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Arun Sharma <asharma@fb.com>
Cc: Benjamin Redelings <benjamin.redelings@nescent.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Frank Ch. Eigler <fche@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Ulrich Drepper <drepper@gmail.com>
Link: http://lkml.kernel.org/r/1344345647-11536-6-git-send-email-jolsa@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/Kconfig               |   7 +++
 arch/x86/Kconfig           |   1 +
 include/linux/perf_event.h |  18 +++++-
 kernel/events/core.c       | 150 ++++++++++++++++++++++++++++++++++++++++++++-
 kernel/events/internal.h   |  16 +++++
 5 files changed, 190 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 68d827b7ae82..2a83a3f6a615 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -228,6 +228,13 @@ config HAVE_PERF_REGS
 	  Support selective register dumps for perf events. This includes
 	  bit-mapping of each registers and a unique architecture id.
 
+config HAVE_PERF_USER_STACK_DUMP
+	bool
+	help
+	  Support user stack dumps for perf event samples. This needs
+	  access to the user stack pointer which is not unified across
+	  architectures.
+
 config HAVE_ARCH_JUMP_LABEL
 	bool
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3fab6ec9edc4..a2d19ee750ca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -61,6 +61,7 @@ config X86
 	select PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
+	select HAVE_PERF_USER_STACK_DUMP
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 8a73f75beb16..d1d25f6a5e24 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -131,8 +131,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_RAW				= 1U << 10,
 	PERF_SAMPLE_BRANCH_STACK		= 1U << 11,
 	PERF_SAMPLE_REGS_USER			= 1U << 12,
+	PERF_SAMPLE_STACK_USER			= 1U << 13,
 
-	PERF_SAMPLE_MAX = 1U << 13,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 14,		/* non-ABI */
 };
 
 /*
@@ -205,6 +206,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER1	72	/* add: config2 */
 #define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
 #define PERF_ATTR_SIZE_VER3	88	/* add: sample_regs_user */
+#define PERF_ATTR_SIZE_VER4	96	/* add: sample_stack_user */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -289,6 +291,14 @@ struct perf_event_attr {
 	 * See asm/perf_regs.h for details.
 	 */
 	__u64	sample_regs_user;
+
+	/*
+	 * Defines size of the user stack to dump on samples.
+	 */
+	__u32	sample_stack_user;
+
+	/* Align to u64. */
+	__u32	__reserved_2;
 };
 
 /*
@@ -568,6 +578,10 @@ enum perf_event_type {
 	 *
 	 * 	{ u64			abi; # enum perf_sample_regs_abi
 	 * 	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+	 *
+	 * 	{ u64			size;
+	 * 	  char			data[size];
+	 * 	  u64			dyn_size; } && PERF_SAMPLE_STACK_USER
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
@@ -1160,6 +1174,7 @@ struct perf_sample_data {
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
 	struct perf_regs_user		regs_user;
+	u64				stack_user_size;
 };
 
 static inline void perf_sample_data_init(struct perf_sample_data *data,
@@ -1172,6 +1187,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->period = period;
 	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
 	data->regs_user.regs = NULL;
+	data->stack_user_size = 0;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index d3ce97525b9f..2ba890450d15 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -36,6 +36,7 @@
 #include <linux/perf_event.h>
 #include <linux/ftrace_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/mm_types.h>
 
 #include "internal.h"
 
@@ -3787,6 +3788,101 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
 	}
 }
 
+/*
+ * Get remaining task size from user stack pointer.
+ *
+ * It'd be better to take stack vma map and limit this more
+ * precisly, but there's no way to get it safely under interrupt,
+ * so using TASK_SIZE as limit.
+ */
+static u64 perf_ustack_task_size(struct pt_regs *regs)
+{
+	unsigned long addr = perf_user_stack_pointer(regs);
+
+	if (!addr || addr >= TASK_SIZE)
+		return 0;
+
+	return TASK_SIZE - addr;
+}
+
+static u16
+perf_sample_ustack_size(u16 stack_size, u16 header_size,
+			struct pt_regs *regs)
+{
+	u64 task_size;
+
+	/* No regs, no stack pointer, no dump. */
+	if (!regs)
+		return 0;
+
+	/*
+	 * Check if we fit in with the requested stack size into the:
+	 * - TASK_SIZE
+	 *   If we don't, we limit the size to the TASK_SIZE.
+	 *
+	 * - remaining sample size
+	 *   If we don't, we customize the stack size to
+	 *   fit in to the remaining sample size.
+	 */
+
+	task_size  = min((u64) USHRT_MAX, perf_ustack_task_size(regs));
+	stack_size = min(stack_size, (u16) task_size);
+
+	/* Current header size plus static size and dynamic size. */
+	header_size += 2 * sizeof(u64);
+
+	/* Do we fit in with the current stack dump size? */
+	if ((u16) (header_size + stack_size) < header_size) {
+		/*
+		 * If we overflow the maximum size for the sample,
+		 * we customize the stack dump size to fit in.
+		 */
+		stack_size = USHRT_MAX - header_size - sizeof(u64);
+		stack_size = round_up(stack_size, sizeof(u64));
+	}
+
+	return stack_size;
+}
+
+static void
+perf_output_sample_ustack(struct perf_output_handle *handle, u64 dump_size,
+			  struct pt_regs *regs)
+{
+	/* Case of a kernel thread, nothing to dump */
+	if (!regs) {
+		u64 size = 0;
+		perf_output_put(handle, size);
+	} else {
+		unsigned long sp;
+		unsigned int rem;
+		u64 dyn_size;
+
+		/*
+		 * We dump:
+		 * static size
+		 *   - the size requested by user or the best one we can fit
+		 *     in to the sample max size
+		 * data
+		 *   - user stack dump data
+		 * dynamic size
+		 *   - the actual dumped size
+		 */
+
+		/* Static size. */
+		perf_output_put(handle, dump_size);
+
+		/* Data. */
+		sp = perf_user_stack_pointer(regs);
+		rem = __output_copy_user(handle, (void *) sp, dump_size);
+		dyn_size = dump_size - rem;
+
+		perf_output_skip(handle, rem);
+
+		/* Dynamic size. */
+		perf_output_put(handle, dyn_size);
+	}
+}
+
 static void __perf_event_header__init_id(struct perf_event_header *header,
 					 struct perf_sample_data *data,
 					 struct perf_event *event)
@@ -4064,6 +4160,11 @@ void perf_output_sample(struct perf_output_handle *handle,
 						mask);
 		}
 	}
+
+	if (sample_type & PERF_SAMPLE_STACK_USER)
+		perf_output_sample_ustack(handle,
+					  data->stack_user_size,
+					  data->regs_user.regs);
 }
 
 void perf_prepare_sample(struct perf_event_header *header,
@@ -4129,6 +4230,35 @@ void perf_prepare_sample(struct perf_event_header *header,
 
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_STACK_USER) {
+		/*
+		 * Either we need PERF_SAMPLE_STACK_USER bit to be allways
+		 * processed as the last one or have additional check added
+		 * in case new sample type is added, because we could eat
+		 * up the rest of the sample size.
+		 */
+		struct perf_regs_user *uregs = &data->regs_user;
+		u16 stack_size = event->attr.sample_stack_user;
+		u16 size = sizeof(u64);
+
+		if (!uregs->abi)
+			perf_sample_regs_user(uregs, regs);
+
+		stack_size = perf_sample_ustack_size(stack_size, header->size,
+						     uregs->regs);
+
+		/*
+		 * If there is something to dump, add space for the dump
+		 * itself and for the field that tells the dynamic size,
+		 * which is how many have been actually dumped.
+		 */
+		if (stack_size)
+			size += sizeof(u64) + stack_size;
+
+		data->stack_user_size = stack_size;
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -6205,8 +6335,26 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 		}
 	}
 
-	if (attr->sample_type & PERF_SAMPLE_REGS_USER)
+	if (attr->sample_type & PERF_SAMPLE_REGS_USER) {
 		ret = perf_reg_validate(attr->sample_regs_user);
+		if (ret)
+			return ret;
+	}
+
+	if (attr->sample_type & PERF_SAMPLE_STACK_USER) {
+		if (!arch_perf_have_user_stack_dump())
+			return -ENOSYS;
+
+		/*
+		 * We have __u32 type for the size, but so far
+		 * we can only use __u16 as maximum due to the
+		 * __u16 sample size limit.
+		 */
+		if (attr->sample_stack_user >= USHRT_MAX)
+			ret = -EINVAL;
+		else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64)))
+			ret = -EINVAL;
+	}
 
 out:
 	return ret;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ce7bdfc1d045..d56a64c99a8b 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -158,4 +158,20 @@ static inline void put_recursion_context(int *recursion, int rctx)
 	recursion[rctx]--;
 }
 
+#ifdef CONFIG_HAVE_PERF_USER_STACK_DUMP
+static inline bool arch_perf_have_user_stack_dump(void)
+{
+	return true;
+}
+
+#define perf_user_stack_pointer(regs) user_stack_pointer(regs)
+#else
+static inline bool arch_perf_have_user_stack_dump(void)
+{
+	return false;
+}
+
+#define perf_user_stack_pointer(regs) 0
+#endif /* CONFIG_HAVE_PERF_USER_STACK_DUMP */
+
 #endif /* _KERNEL_EVENTS_INTERNAL_H */
-- 
cgit v1.2.3


From 51d59c6b422f3f95940ae4e5b42f165595906aee Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 3 Aug 2012 15:57:49 -0300
Subject: KVM: x86: fix pvclock guest stopped flag reporting

kvm_guest_time_update unconditionally clears hv_clock.flags field,
so the notification never reaches the guest.

Fix it by allowing PVCLOCK_GUEST_STOPPED to passthrough.

Reviewed-by: Eric B Munson <emunson@mgebm.net>
Reviewed-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 13 ++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1309e69b57fa..fc0e752e7564 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -420,6 +420,8 @@ struct kvm_vcpu_arch {
 	unsigned int hw_tsc_khz;
 	unsigned int time_offset;
 	struct page *time_page;
+	/* set guest stopped flag in pvclock flags field */
+	bool pvclock_set_guest_stopped_request;
 
 	struct {
 		u64 msr_val;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91a595827deb..fb0d93788bfb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1134,6 +1134,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	unsigned long this_tsc_khz;
 	s64 kernel_ns, max_kernel_ns;
 	u64 tsc_timestamp;
+	u8 pvclock_flags;
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
@@ -1215,7 +1216,14 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_kernel_ns = kernel_ns;
 	vcpu->last_guest_tsc = tsc_timestamp;
-	vcpu->hv_clock.flags = 0;
+
+	pvclock_flags = 0;
+	if (vcpu->pvclock_set_guest_stopped_request) {
+		pvclock_flags |= PVCLOCK_GUEST_STOPPED;
+		vcpu->pvclock_set_guest_stopped_request = false;
+	}
+
+	vcpu->hv_clock.flags = pvclock_flags;
 
 	/*
 	 * The interface expects us to write an even number signaling that the
@@ -2624,10 +2632,9 @@ static int kvm_vcpu_ioctl_x86_set_xcrs(struct kvm_vcpu *vcpu,
  */
 static int kvm_set_guest_paused(struct kvm_vcpu *vcpu)
 {
-	struct pvclock_vcpu_time_info *src = &vcpu->arch.hv_clock;
 	if (!vcpu->arch.time_page)
 		return -EINVAL;
-	src->flags |= PVCLOCK_GUEST_STOPPED;
+	vcpu->arch.pvclock_set_guest_stopped_request = true;
 	kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 	return 0;
 }
-- 
cgit v1.2.3


From e423ca155d3f5f16b46e30de9c818875b1fd617d Mon Sep 17 00:00:00 2001
From: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Date: Tue, 7 Aug 2012 13:10:13 +0530
Subject: KVM: Correct vmrun to vmcall typo

Signed-off-by: Raghavendra K T <raghavendra.kt@linux.vnet.ibm.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_para.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 2f7712e08b1e..20f5697888bd 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -116,7 +116,7 @@ static inline bool kvm_check_and_clear_guest_paused(void)
  */
 #define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
 
-/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
+/* For KVM hypercalls, a three-byte sequence of either the vmcall or the vmmcall
  * instruction.  The hypervisor may replace it with something else but only the
  * instructions are guaranteed to be supported.
  *
-- 
cgit v1.2.3


From 2a7921b7a033d5cfc176690d6297c82846c582b2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Sun, 12 Aug 2012 16:12:29 +0300
Subject: KVM: VMX: restore MSR_IA32_DEBUGCTLMSR after VMEXIT

MSR_IA32_DEBUGCTLMSR is zeroed on VMEXIT. Restore it to the correct
value.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index cc8ad9836927..d0f4bec9fc62 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6222,6 +6222,7 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	unsigned long debugctlmsr;
 
 	if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) {
 		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
@@ -6261,6 +6262,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		vmx_set_interrupt_shadow(vcpu, 0);
 
 	atomic_switch_perf_msrs(vmx);
+	debugctlmsr = get_debugctlmsr();
 
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
@@ -6362,6 +6364,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 	      );
 
+	/* MSR_IA32_DEBUGCTLMSR is zeroed on vmexit. Restore it if needed */
+	if (debugctlmsr)
+		update_debugctlmsr(debugctlmsr);
+
 #ifndef CONFIG_X86_64
 	/*
 	 * The sysexit path does not restore ds/es, so we must set them to
-- 
cgit v1.2.3


From dbcb4e798072d114fe68813f39a9efd239ab99c0 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 13 Aug 2012 15:38:22 +0300
Subject: KVM: VMX: Advertize RDTSC exiting to nested guests

All processors that support VMX have that feature, and guests (Xen) depend on
it.  As we already implement it, advertize it to the guest.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d0f4bec9fc62..13e0296cea46 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1990,7 +1990,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 #endif
 		CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
 		CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
-		CPU_BASED_RDPMC_EXITING |
+		CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
 		CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
 	/*
 	 * We can allow some features even when not supported by the
-- 
cgit v1.2.3


From 28a6fdabb3ea775d3d707afd9d2728b3ced2c34d Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Aug 2012 19:20:28 +0300
Subject: KVM: x86: drop parameter validation in ioapic/pic

We validate irq pin number when routing is setup, so
code handling illegal irq # in pic and ioapic on each injection
is never called.
Drop it, replace with BUG_ON to catch out of bounds access bugs.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/i8259.c | 18 +++++++++---------
 virt/kvm/ioapic.c    | 37 +++++++++++++++++++------------------
 2 files changed, 28 insertions(+), 27 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e498b18f010c..90c84f947d45 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -190,17 +190,17 @@ void kvm_pic_update_irq(struct kvm_pic *s)
 
 int kvm_pic_set_irq(struct kvm_pic *s, int irq, int irq_source_id, int level)
 {
-	int ret = -1;
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= PIC_NUM_PINS);
 
 	pic_lock(s);
-	if (irq >= 0 && irq < PIC_NUM_PINS) {
-		int irq_level = __kvm_irq_line_state(&s->irq_states[irq],
-						     irq_source_id, level);
-		ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
-		pic_update_irq(s);
-		trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
-				      s->pics[irq >> 3].imr, ret == 0);
-	}
+	irq_level = __kvm_irq_line_state(&s->irq_states[irq],
+					 irq_source_id, level);
+	ret = pic_set_irq1(&s->pics[irq >> 3], irq & 7, irq_level);
+	pic_update_irq(s);
+	trace_kvm_pic_set_irq(irq >> 3, irq & 7, s->pics[irq >> 3].elcr,
+			      s->pics[irq >> 3].imr, ret == 0);
 	pic_unlock(s);
 
 	return ret;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ef61d529a6c4..cfb7e4d52dc2 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -197,28 +197,29 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
 	u32 old_irr;
 	u32 mask = 1 << irq;
 	union kvm_ioapic_redirect_entry entry;
-	int ret = 1;
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
 
 	spin_lock(&ioapic->lock);
 	old_irr = ioapic->irr;
-	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
-		int irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
-						     irq_source_id, level);
-		entry = ioapic->redirtbl[irq];
-		irq_level ^= entry.fields.polarity;
-		if (!irq_level)
-			ioapic->irr &= ~mask;
-		else {
-			int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
-			ioapic->irr |= mask;
-			if ((edge && old_irr != ioapic->irr) ||
-			    (!edge && !entry.fields.remote_irr))
-				ret = ioapic_service(ioapic, irq);
-			else
-				ret = 0; /* report coalesced interrupt */
-		}
-		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+					 irq_source_id, level);
+	entry = ioapic->redirtbl[irq];
+	irq_level ^= entry.fields.polarity;
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+	} else {
+		int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+		ioapic->irr |= mask;
+		if ((edge && old_irr != ioapic->irr) ||
+		    (!edge && !entry.fields.remote_irr))
+			ret = ioapic_service(ioapic, irq);
+		else
+			ret = 0; /* report coalesced interrupt */
 	}
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
 	spin_unlock(&ioapic->lock);
 
 	return ret;
-- 
cgit v1.2.3


From 8fbe6a541f50eeec5e3e49bd92db23ade9496673 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Wed, 15 Aug 2012 16:00:40 +0200
Subject: KVM guest: disable stealtime on reboot to avoid mem corruption

else, host continues to update stealtime after reboot,
which can corrupt e.g. initramfs area.
found when tracking down initramfs unpack error on initial reboot
(with qemu-kvm -smp 2, no problem with single-core).

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kernel/kvm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c1d61ee4b4f1..1596cc8fd793 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -354,6 +354,7 @@ static void kvm_pv_guest_cpu_reboot(void *unused)
 	if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
 		wrmsrl(MSR_KVM_PV_EOI_EN, 0);
 	kvm_pv_disable_apf();
+	kvm_disable_steal_time();
 }
 
 static int kvm_pv_reboot_notify(struct notifier_block *nb,
-- 
cgit v1.2.3


From 023af608254add7ba037cd634cc5f2fb21ff6420 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Sun, 22 Jul 2012 18:18:37 +0300
Subject: crypto: aesni_intel - improve lrw and xts performance by utilizing
 parallel AES-NI hardware pipelines

Use parallel LRW and XTS encryption facilities to better utilize AES-NI
hardware pipelines and gain extra performance.

Tcrypt benchmark results (async), old vs new ratios:

Intel Core i5-2450M CPU (fam: 6, model: 42, step: 7)

aes:128bit
        lrw:256bit      xts:256bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     0.99x   1.00x   1.22x   1.19x
64B     1.38x   1.50x   1.58x   1.61x
256B    2.04x   2.02x   2.27x   2.29x
1024B   2.56x   2.54x   2.89x   2.92x
8192B   2.85x   2.99x   3.40x   3.23x

aes:192bit
        lrw:320bit      xts:384bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.08x   1.08x   1.16x   1.17x
64B     1.48x   1.54x   1.59x   1.65x
256B    2.18x   2.17x   2.29x   2.28x
1024B   2.67x   2.67x   2.87x   3.05x
8192B   2.93x   2.84x   3.28x   3.33x

aes:256bit
        lrw:348bit      xts:512bit
size    lrw-enc lrw-dec xts-dec xts-dec
16B     1.07x   1.07x   1.18x   1.19x
64B     1.56x   1.56x   1.70x   1.71x
256B    2.22x   2.24x   2.46x   2.46x
1024B   2.76x   2.77x   3.13x   3.05x
8192B   2.99x   3.05x   3.40x   3.30x

Cc: Huang Ying <ying.huang@intel.com>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Reviewed-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aesni-intel_glue.c | 253 ++++++++++++++++++++++++++++++++-----
 crypto/Kconfig                     |   2 +
 2 files changed, 220 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 648347a05773..7c04d0da709b 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -28,6 +28,9 @@
 #include <crypto/aes.h>
 #include <crypto/cryptd.h>
 #include <crypto/ctr.h>
+#include <crypto/b128ops.h>
+#include <crypto/lrw.h>
+#include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
 #include <asm/i387.h>
 #include <asm/crypto/aes.h>
@@ -41,18 +44,10 @@
 #define HAS_CTR
 #endif
 
-#if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE)
-#define HAS_LRW
-#endif
-
 #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE)
 #define HAS_PCBC
 #endif
 
-#if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE)
-#define HAS_XTS
-#endif
-
 /* This data is stored at the end of the crypto_tfm struct.
  * It's a type of per "session" data storage location.
  * This needs to be 16 byte aligned.
@@ -79,6 +74,16 @@ struct aesni_hash_subkey_req_data {
 #define AES_BLOCK_MASK	(~(AES_BLOCK_SIZE-1))
 #define RFC4106_HASH_SUBKEY_SIZE 16
 
+struct aesni_lrw_ctx {
+	struct lrw_table_ctx lrw_table;
+	u8 raw_aes_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
+struct aesni_xts_ctx {
+	u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+	u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx) + AESNI_ALIGN - 1];
+};
+
 asmlinkage int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
 			     unsigned int key_len);
 asmlinkage void aesni_enc(struct crypto_aes_ctx *ctx, u8 *out,
@@ -398,13 +403,6 @@ static int ablk_rfc3686_ctr_init(struct crypto_tfm *tfm)
 #endif
 #endif
 
-#ifdef HAS_LRW
-static int ablk_lrw_init(struct crypto_tfm *tfm)
-{
-	return ablk_init_common(tfm, "fpu(lrw(__driver-aes-aesni))");
-}
-#endif
-
 #ifdef HAS_PCBC
 static int ablk_pcbc_init(struct crypto_tfm *tfm)
 {
@@ -412,12 +410,160 @@ static int ablk_pcbc_init(struct crypto_tfm *tfm)
 }
 #endif
 
-#ifdef HAS_XTS
-static int ablk_xts_init(struct crypto_tfm *tfm)
+static void lrw_xts_encrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
 {
-	return ablk_init_common(tfm, "fpu(xts(__driver-aes-aesni))");
+	aesni_ecb_enc(ctx, blks, blks, nbytes);
+}
+
+static void lrw_xts_decrypt_callback(void *ctx, u8 *blks, unsigned int nbytes)
+{
+	aesni_ecb_dec(ctx, blks, blks, nbytes);
+}
+
+static int lrw_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+	int err;
+
+	err = aes_set_key_common(tfm, ctx->raw_aes_ctx, key,
+				 keylen - AES_BLOCK_SIZE);
+	if (err)
+		return err;
+
+	return lrw_init_table(&ctx->lrw_table, key + keylen - AES_BLOCK_SIZE);
+}
+
+static void lrw_aesni_exit_tfm(struct crypto_tfm *tfm)
+{
+	struct aesni_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	lrw_free_table(&ctx->lrw_table);
+}
+
+static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct lrw_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.table_ctx = &ctx->lrw_table,
+		.crypt_ctx = aes_ctx(ctx->raw_aes_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = lrw_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
+			    unsigned int keylen)
+{
+	struct aesni_xts_ctx *ctx = crypto_tfm_ctx(tfm);
+	u32 *flags = &tfm->crt_flags;
+	int err;
+
+	/* key consists of keys of equal size concatenated, therefore
+	 * the length must be even
+	 */
+	if (keylen % 2) {
+		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+		return -EINVAL;
+	}
+
+	/* first half of xts-key is for crypt */
+	err = aes_set_key_common(tfm, ctx->raw_crypt_ctx, key, keylen / 2);
+	if (err)
+		return err;
+
+	/* second half of xts-key is for tweak */
+	return aes_set_key_common(tfm, ctx->raw_tweak_ctx, key + keylen / 2,
+				  keylen / 2);
+}
+
+
+static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_encrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
+}
+
+static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
+		       struct scatterlist *src, unsigned int nbytes)
+{
+	struct aesni_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	be128 buf[8];
+	struct xts_crypt_req req = {
+		.tbuf = buf,
+		.tbuflen = sizeof(buf),
+
+		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
+		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
+		.crypt_fn = lrw_xts_decrypt_callback,
+	};
+	int ret;
+
+	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	kernel_fpu_begin();
+	ret = xts_crypt(desc, dst, src, nbytes, &req);
+	kernel_fpu_end();
+
+	return ret;
 }
-#endif
 
 #ifdef CONFIG_X86_64
 static int rfc4106_init(struct crypto_tfm *tfm)
@@ -1035,10 +1181,10 @@ static struct crypto_alg aesni_algs[] = { {
 	},
 #endif
 #endif
-#ifdef HAS_LRW
+#ifdef HAS_PCBC
 }, {
-	.cra_name		= "lrw(aes)",
-	.cra_driver_name	= "lrw-aes-aesni",
+	.cra_name		= "pcbc(aes)",
+	.cra_driver_name	= "pcbc-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1046,12 +1192,12 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_lrw_init,
+	.cra_init		= ablk_pcbc_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
@@ -1059,10 +1205,50 @@ static struct crypto_alg aesni_algs[] = { {
 		},
 	},
 #endif
-#ifdef HAS_PCBC
 }, {
-	.cra_name		= "pcbc(aes)",
-	.cra_driver_name	= "pcbc-aes-aesni",
+	.cra_name		= "__lrw-aes-aesni",
+	.cra_driver_name	= "__driver-lrw-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_lrw_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_exit		= lrw_aesni_exit_tfm,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= lrw_aesni_setkey,
+			.encrypt	= lrw_encrypt,
+			.decrypt	= lrw_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "__xts-aes-aesni",
+	.cra_driver_name	= "__driver-xts-aes-aesni",
+	.cra_priority		= 0,
+	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_blocksize		= AES_BLOCK_SIZE,
+	.cra_ctxsize		= sizeof(struct aesni_xts_ctx),
+	.cra_alignmask		= 0,
+	.cra_type		= &crypto_blkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u = {
+		.blkcipher = {
+			.min_keysize	= 2 * AES_MIN_KEY_SIZE,
+			.max_keysize	= 2 * AES_MAX_KEY_SIZE,
+			.ivsize		= AES_BLOCK_SIZE,
+			.setkey		= xts_aesni_setkey,
+			.encrypt	= xts_encrypt,
+			.decrypt	= xts_decrypt,
+		},
+	},
+}, {
+	.cra_name		= "lrw(aes)",
+	.cra_driver_name	= "lrw-aes-aesni",
 	.cra_priority		= 400,
 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
 	.cra_blocksize		= AES_BLOCK_SIZE,
@@ -1070,20 +1256,18 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_pcbc_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
-			.min_keysize	= AES_MIN_KEY_SIZE,
-			.max_keysize	= AES_MAX_KEY_SIZE,
+			.min_keysize	= AES_MIN_KEY_SIZE + AES_BLOCK_SIZE,
+			.max_keysize	= AES_MAX_KEY_SIZE + AES_BLOCK_SIZE,
 			.ivsize		= AES_BLOCK_SIZE,
 			.setkey		= ablk_set_key,
 			.encrypt	= ablk_encrypt,
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
-#ifdef HAS_XTS
 }, {
 	.cra_name		= "xts(aes)",
 	.cra_driver_name	= "xts-aes-aesni",
@@ -1094,7 +1278,7 @@ static struct crypto_alg aesni_algs[] = { {
 	.cra_alignmask		= 0,
 	.cra_type		= &crypto_ablkcipher_type,
 	.cra_module		= THIS_MODULE,
-	.cra_init		= ablk_xts_init,
+	.cra_init		= ablk_init,
 	.cra_exit		= ablk_exit,
 	.cra_u = {
 		.ablkcipher = {
@@ -1106,7 +1290,6 @@ static struct crypto_alg aesni_algs[] = { {
 			.decrypt	= ablk_decrypt,
 		},
 	},
-#endif
 } };
 
 
diff --git a/crypto/Kconfig b/crypto/Kconfig
index cbcc0e2eeda0..213fb37be51f 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -564,6 +564,8 @@ config CRYPTO_AES_NI_INTEL
 	select CRYPTO_CRYPTD
 	select CRYPTO_ABLK_HELPER_X86
 	select CRYPTO_ALGAPI
+	select CRYPTO_LRW
+	select CRYPTO_XTS
 	help
 	  Use Intel AES-NI instructions for AES algorithm.
 
-- 
cgit v1.2.3


From 0570a365a6b8ccbfe7baa459de2b7396ddf2de90 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Sun, 19 Aug 2012 19:06:43 +0200
Subject: x86, boot: Remove obsolete and unused constant RAMDISK

The named constant RAMDISK is unused. It used to set the (obsolete)
kernel boot header field ram_size, but its usage for that purpose got
dropped in commit 5e47c478b0b69bc9bc3ba544e4b1ca3268f98fef ("x86: remove
zImage support"). Now remove this constant too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Link: http://lkml.kernel.org/r/1345396003.1771.9.camel@x61.thuisdomein
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/boot/header.S | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S
index b4e15dd6786a..2a017441b8b2 100644
--- a/arch/x86/boot/header.S
+++ b/arch/x86/boot/header.S
@@ -32,10 +32,6 @@ SYSSEG		= 0x1000		/* historical load address >> 4 */
 #define SVGA_MODE ASK_VGA
 #endif
 
-#ifndef RAMDISK
-#define RAMDISK 0
-#endif
-
 #ifndef ROOT_RDONLY
 #define ROOT_RDONLY 1
 #endif
-- 
cgit v1.2.3


From a3118beb6a8cbe77ae3342125d920205871b0717 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 28 Jun 2012 22:12:36 -0400
Subject: xen/p2m: Fix the comment describing the P2M tree.

It mixed up the p2m_mid_missing with p2m_missing. Also
remove some extra spaces.

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 64effdc6da94..e4adbfbdfada 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -22,7 +22,7 @@
  *
  * P2M_PER_PAGE depends on the architecture, as a mfn is always
  * unsigned long (8 bytes on 64-bit, 4 bytes on 32), leading to
- * 512 and 1024 entries respectively. 
+ * 512 and 1024 entries respectively.
  *
  * In short, these structures contain the Machine Frame Number (MFN) of the PFN.
  *
@@ -139,11 +139,11 @@
  *      /    | ~0, ~0, ....  |
  *     |     \---------------/
  *     |
- *     p2m_missing             p2m_missing
- * /------------------\     /------------\
- * | [p2m_mid_missing]+---->| ~0, ~0, ~0 |
- * | [p2m_mid_missing]+---->| ..., ~0    |
- * \------------------/     \------------/
+ *   p2m_mid_missing           p2m_missing
+ * /-----------------\     /------------\
+ * | [p2m_missing]   +---->| ~0, ~0, ~0 |
+ * | [p2m_missing]   +---->| ..., ~0    |
+ * \-----------------/     \------------/
  *
  * where ~0 is INVALID_P2M_ENTRY. IDENTITY is (PFN | IDENTITY_BIT)
  */
@@ -423,7 +423,7 @@ static void free_p2m_page(void *p)
 	free_page((unsigned long)p);
 }
 
-/* 
+/*
  * Fully allocate the p2m structure for a given pfn.  We need to check
  * that both the top and mid levels are allocated, and make sure the
  * parallel mfn tree is kept in sync.  We may race with other cpus, so
-- 
cgit v1.2.3


From 59b294403e9814e7c1154043567f0d71bac7a511 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 19 Jul 2012 10:23:47 -0400
Subject: xen/x86: Use memblock_reserve for sensitive areas.

instead of a big memblock_reserve. This way we can be more
selective in freeing regions (and it also makes it easier
to understand where is what).

[v1: Move the auto_translate_physmap to proper line]
[v2: Per Stefano suggestion add more comments]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/p2m.c       |  5 +++++
 arch/x86/xen/setup.c     |  9 ---------
 3 files changed, 53 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ff962d4b821e..e532eb50e8d7 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -998,7 +998,54 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 
 	return ret;
 }
+/*
+ * If the MFN is not in the m2p (provided to us by the hypervisor) this
+ * function won't do anything. In practice this means that the XenBus
+ * MFN won't be available for the initial domain. */
+static void __init xen_reserve_mfn(unsigned long mfn)
+{
+	unsigned long pfn;
+
+	if (!mfn)
+		return;
+	pfn = mfn_to_pfn(mfn);
+	if (phys_to_machine_mapping_valid(pfn))
+		memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE);
+}
+static void __init xen_reserve_internals(void)
+{
+	unsigned long size;
+
+	if (!xen_pv_domain())
+		return;
+
+	/* xen_start_info does not exist in the M2P, hence can't use
+	 * xen_reserve_mfn. */
+	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
+
+	xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info));
+	xen_reserve_mfn(xen_start_info->store_mfn);
 
+	if (!xen_initial_domain())
+		xen_reserve_mfn(xen_start_info->console.domU.mfn);
+
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return;
+
+	/*
+	 * ALIGN up to compensate for the p2m_page pointing to an array that
+	 * can partially filled (look in xen_build_dynamic_phys_to_machine).
+	 */
+
+	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+
+	/* We could use xen_reserve_mfn here, but would end up looping quite
+	 * a lot (and call memblock_reserve for each PAGE), so lets just use
+	 * the easy way and reserve it wholesale. */
+	memblock_reserve(__pa(xen_start_info->mfn_list), size);
+
+	/* The pagetables are reserved in mmu.c */
+}
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1362,6 +1409,7 @@ asmlinkage void __init xen_start_kernel(void)
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 
+	xen_reserve_internals();
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index e4adbfbdfada..6a2bfa43c8a1 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -388,6 +388,11 @@ void __init xen_build_dynamic_phys_to_machine(void)
 	}
 
 	m2p_override_init();
+
+	/* NOTE: We cannot call memblock_reserve here for the mfn_list as there
+	 * isn't enough pieces to make it work (for one - we are still using the
+	 * Xen provided pagetable). Do it later in xen_reserve_internals.
+	 */
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index a4790bf22c59..9efca750405d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -424,15 +424,6 @@ char * __init xen_memory_setup(void)
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
 
-	/*
-	 * Reserve Xen bits:
-	 *  - mfn_list
-	 *  - xen_start_info
-	 * See comment above "struct start_info" in <xen/interface/xen.h>
-	 */
-	memblock_reserve(__pa(xen_start_info->mfn_list),
-			 xen_start_info->pt_base - xen_start_info->mfn_list);
-
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
 	return "Xen";
-- 
cgit v1.2.3


From 806c312e50f122c47913145cf884f53dd09d9199 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 21 Aug 2012 14:31:24 -0400
Subject: xen/x86: Workaround 64-bit hypervisor and 32-bit initial domain.

If a 64-bit hypervisor is booted with a 32-bit initial domain,
the hypervisor deals with the initial domain as "compat" and
does some extra adjustments (like pagetables are 4 bytes instead
of 8). It also adjusts the xen_start_info->pt_base incorrectly.

When booted with a 32-bit hypervisor (32-bit initial domain):
..
(XEN)  Start info:    cf831000->cf83147c
(XEN)  Page tables:   cf832000->cf8b5000
..
[    0.000000] PT: cf832000 (f832000)
[    0.000000] Reserving PT: f832000->f8b5000

And with a 64-bit hypervisor:
(XEN)  Start info:    00000000cf831000->00000000cf8314b4
(XEN)  Page tables:   00000000cf832000->00000000cf8b6000

[    0.000000] PT: cf834000 (f834000)
[    0.000000] Reserving PT: f834000->f8b8000

To deal with this, we keep keep track of the highest physical
address we have reserved via memblock_reserve. If that address
does not overlap with pt_base, we have a gap which we reserve.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index e532eb50e8d7..511f92d79e4a 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1002,19 +1002,24 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
  * If the MFN is not in the m2p (provided to us by the hypervisor) this
  * function won't do anything. In practice this means that the XenBus
  * MFN won't be available for the initial domain. */
-static void __init xen_reserve_mfn(unsigned long mfn)
+static unsigned long __init xen_reserve_mfn(unsigned long mfn)
 {
-	unsigned long pfn;
+	unsigned long pfn, end_pfn = 0;
 
 	if (!mfn)
-		return;
+		return end_pfn;
+
 	pfn = mfn_to_pfn(mfn);
-	if (phys_to_machine_mapping_valid(pfn))
-		memblock_reserve(PFN_PHYS(pfn), PAGE_SIZE);
+	if (phys_to_machine_mapping_valid(pfn)) {
+		end_pfn = PFN_PHYS(pfn) + PAGE_SIZE;
+		memblock_reserve(PFN_PHYS(pfn), end_pfn);
+	}
+	return end_pfn;
 }
 static void __init xen_reserve_internals(void)
 {
 	unsigned long size;
+	unsigned long last_phys = 0;
 
 	if (!xen_pv_domain())
 		return;
@@ -1022,12 +1027,13 @@ static void __init xen_reserve_internals(void)
 	/* xen_start_info does not exist in the M2P, hence can't use
 	 * xen_reserve_mfn. */
 	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
+	last_phys = __pa(xen_start_info) + PAGE_SIZE;
 
-	xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info));
-	xen_reserve_mfn(xen_start_info->store_mfn);
+	last_phys = max(xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)), last_phys);
+	last_phys = max(xen_reserve_mfn(xen_start_info->store_mfn), last_phys);
 
 	if (!xen_initial_domain())
-		xen_reserve_mfn(xen_start_info->console.domU.mfn);
+		last_phys = max(xen_reserve_mfn(xen_start_info->console.domU.mfn), last_phys);
 
 	if (xen_feature(XENFEAT_auto_translated_physmap))
 		return;
@@ -1043,8 +1049,14 @@ static void __init xen_reserve_internals(void)
 	 * a lot (and call memblock_reserve for each PAGE), so lets just use
 	 * the easy way and reserve it wholesale. */
 	memblock_reserve(__pa(xen_start_info->mfn_list), size);
-
+	last_phys = max(__pa(xen_start_info->mfn_list) + size, last_phys);
 	/* The pagetables are reserved in mmu.c */
+
+	/* Under 64-bit hypervisor with a 32-bit domain, the hypervisor
+	 * offsets the pt_base by two pages. Hence the reservation that is done
+	 * in mmu.c misses two pages. We correct it here if we detect this. */
+	if (last_phys < __pa(xen_start_info->pt_base))
+		memblock_reserve(last_phys, __pa(xen_start_info->pt_base) - last_phys);
 }
 void xen_setup_shared_info(void)
 {
-- 
cgit v1.2.3


From 988f0e24bbcbbf550dff016faf8273a94f4eb1af Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 27 Jul 2012 20:10:58 -0400
Subject: xen/swiotlb: Simplify the logic.

Its pretty easy:
 1). We only check to see if we need Xen SWIOTLB for PV guests.
 2). If swiotlb=force or iommu=soft is set, then Xen SWIOTLB will
     be enabled.
 3). If it is an initial domain, then Xen SWIOTLB will be enabled.
 4). Native SWIOTLB must be disabled for PV guests.

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/pci-swiotlb-xen.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 967633ad98c4..b6a534002ab2 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -34,19 +34,20 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
 int __init pci_xen_swiotlb_detect(void)
 {
 
+	if (!xen_pv_domain())
+		return 0;
+
 	/* If running as PV guest, either iommu=soft, or swiotlb=force will
 	 * activate this IOMMU. If running as PV privileged, activate it
 	 * irregardless.
 	 */
-	if ((xen_initial_domain() || swiotlb || swiotlb_force) &&
-	    (xen_pv_domain()))
+	if ((xen_initial_domain() || swiotlb || swiotlb_force))
 		xen_swiotlb = 1;
 
 	/* If we are running under Xen, we MUST disable the native SWIOTLB.
 	 * Don't worry about swiotlb_force flag activating the native, as
 	 * the 'swiotlb' flag is the only one turning it on. */
-	if (xen_pv_domain())
-		swiotlb = 0;
+	swiotlb = 0;
 
 	return xen_swiotlb;
 }
-- 
cgit v1.2.3


From fc2341df9e31be8a3940f4e302372d7ef46bab8c Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 27 Jul 2012 20:16:00 -0400
Subject: xen/swiotlb: With more than 4GB on 64-bit, disable the native
 SWIOTLB.

If a PV guest is booted the native SWIOTLB should not be
turned on. It does not help us (we don't have any PCI devices)
and it eats 64MB of good memory. In the case of PV guests
with PCI devices we need the Xen-SWIOTLB one.

[v1: Rewrite comment per Stefano's suggestion]
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/pci-swiotlb-xen.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b6a534002ab2..1c1722761eec 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -8,6 +8,11 @@
 #include <xen/xen.h>
 #include <asm/iommu_table.h>
 
+#ifdef CONFIG_X86_64
+#include <asm/iommu.h>
+#include <asm/dma.h>
+#endif
+
 int xen_swiotlb __read_mostly;
 
 static struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -49,6 +54,15 @@ int __init pci_xen_swiotlb_detect(void)
 	 * the 'swiotlb' flag is the only one turning it on. */
 	swiotlb = 0;
 
+#ifdef CONFIG_X86_64
+	/* pci_swiotlb_detect_4gb turns on native SWIOTLB if no_iommu == 0
+	 * (so no iommu=X command line over-writes).
+	 * Considering that PV guests do not want the *native SWIOTLB* but
+	 * only Xen SWIOTLB it is not useful to us so set no_iommu=1 here.
+	 */
+	if (max_pfn > MAX_DMA32_PFN)
+		no_iommu = 1;
+#endif
 	return xen_swiotlb;
 }
 
-- 
cgit v1.2.3


From 4d9310e39728a87c86eb48492da7546f61189633 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Mon, 6 Aug 2012 15:27:09 +0100
Subject: xen: missing includes

Changes in v2:
- remove pvclock hack;
- remove include linux/types.h from xen/interface/xen.h.
v3:
- Compile under IA64
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/ia64/include/asm/xen/interface.h      | 2 ++
 arch/x86/include/asm/xen/interface.h       | 2 ++
 drivers/tty/hvc/hvc_xen.c                  | 2 ++
 drivers/xen/grant-table.c                  | 1 +
 drivers/xen/xenbus/xenbus_probe_frontend.c | 1 +
 include/xen/interface/xen.h                | 1 -
 include/xen/privcmd.h                      | 1 +
 7 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index 09d5f7fd9db1..ee9cad6e749b 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -265,6 +265,8 @@ typedef struct xen_callback xen_callback_t;
 
 #endif /* !__ASSEMBLY__ */
 
+#include <asm/pvclock-abi.h>
+
 /* Size of the shared_info area (this is not related to page size).  */
 #define XSI_SHIFT			14
 #define XSI_SIZE			(1 << XSI_SHIFT)
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index cbf0c9d50b92..a93db16e9582 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -121,6 +121,8 @@ struct arch_shared_info {
 #include "interface_64.h"
 #endif
 
+#include <asm/pvclock-abi.h>
+
 #ifndef __ASSEMBLY__
 /*
  * The following is all CPU context. Note that the fpu_ctxt block is filled
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 944eaeb8e0cf..dc07f56d66b5 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -21,6 +21,7 @@
 #include <linux/console.h>
 #include <linux/delay.h>
 #include <linux/err.h>
+#include <linux/irq.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/list.h>
@@ -35,6 +36,7 @@
 #include <xen/page.h>
 #include <xen/events.h>
 #include <xen/interface/io/console.h>
+#include <xen/interface/sched.h>
 #include <xen/hvc-console.h>
 #include <xen/xenbus.h>
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 0bfc1ef11259..1d0d95e5b446 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -47,6 +47,7 @@
 #include <xen/interface/memory.h>
 #include <xen/hvc-console.h>
 #include <asm/xen/hypercall.h>
+#include <asm/xen/interface.h>
 
 #include <asm/pgtable.h>
 #include <asm/sync_bitops.h>
diff --git a/drivers/xen/xenbus/xenbus_probe_frontend.c b/drivers/xen/xenbus/xenbus_probe_frontend.c
index a31b54d48839..3159a37d966d 100644
--- a/drivers/xen/xenbus/xenbus_probe_frontend.c
+++ b/drivers/xen/xenbus/xenbus_probe_frontend.c
@@ -21,6 +21,7 @@
 #include <xen/xenbus.h>
 #include <xen/events.h>
 #include <xen/page.h>
+#include <xen/xen.h>
 
 #include <xen/platform_pci.h>
 
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index a890804945e3..3871e4753680 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -10,7 +10,6 @@
 #define __XEN_PUBLIC_XEN_H__
 
 #include <asm/xen/interface.h>
-#include <asm/pvclock-abi.h>
 
 /*
  * XEN "SYSTEM CALLS" (a.k.a. HYPERCALLS).
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 17857fb4d550..4d588814510b 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -35,6 +35,7 @@
 
 #include <linux/types.h>
 #include <linux/compiler.h>
+#include <xen/interface/xen.h>
 
 typedef unsigned long xen_pfn_t;
 
-- 
cgit v1.2.3


From b8b0f559c7b1dcf5503817e518c81c9a18ee45e0 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 21 Aug 2012 14:49:34 -0400
Subject: xen/apic/xenbus/swiotlb/pcifront/grant/tmem: Make functions or
 variables static.

There is no need for those functions/variables to be visible. Make them
static and also fix the compile warnings of this sort:

drivers/xen/<some file>.c: warning: symbol '<blah>' was not declared. Should it be static?

Some of them just require including the header file that
declares the functions.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/apic.c                     |  3 ++-
 arch/x86/xen/enlighten.c                |  2 ++
 arch/x86/xen/pci-swiotlb-xen.c          |  1 +
 arch/x86/xen/platform-pci-unplug.c      |  1 +
 drivers/pci/xen-pcifront.c              |  2 +-
 drivers/xen/gntdev.c                    |  2 +-
 drivers/xen/grant-table.c               | 13 ++++++-------
 drivers/xen/swiotlb-xen.c               |  2 +-
 drivers/xen/tmem.c                      |  1 +
 drivers/xen/xenbus/xenbus_dev_backend.c |  2 +-
 drivers/xen/xenbus/xenbus_probe.c       |  4 ++--
 11 files changed, 19 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c
index ec57bd3818a4..7005ced5d1ad 100644
--- a/arch/x86/xen/apic.c
+++ b/arch/x86/xen/apic.c
@@ -6,8 +6,9 @@
 
 #include <xen/xen.h>
 #include <xen/interface/physdev.h>
+#include "xen-ops.h"
 
-unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
+static unsigned int xen_io_apic_read(unsigned apic, unsigned reg)
 {
 	struct physdev_apic apic_op;
 	int ret;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ff962d4b821e..cb1b1914dbd3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -79,6 +79,8 @@
 #include "smp.h"
 #include "multicalls.h"
 
+#include <xen/events.h>
+
 EXPORT_SYMBOL_GPL(hypercall_page);
 
 DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu);
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 967633ad98c4..2d58b3ff4fae 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -8,6 +8,7 @@
 #include <xen/xen.h>
 #include <asm/iommu_table.h>
 
+#include <asm/xen/swiotlb-xen.h>
 int xen_swiotlb __read_mostly;
 
 static struct dma_map_ops xen_swiotlb_dma_ops = {
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c
index ffcf2615640b..0a7852483ffe 100644
--- a/arch/x86/xen/platform-pci-unplug.c
+++ b/arch/x86/xen/platform-pci-unplug.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 
 #include <xen/platform_pci.h>
+#include "xen-ops.h"
 
 #define XEN_PLATFORM_ERR_MAGIC -1
 #define XEN_PLATFORM_ERR_PROTOCOL -2
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index d6cc62cb4cf7..a4d901def8f2 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -236,7 +236,7 @@ static int pcifront_bus_write(struct pci_bus *bus, unsigned int devfn,
 	return errno_to_pcibios_err(do_pci_op(pdev, &op));
 }
 
-struct pci_ops pcifront_bus_ops = {
+static struct pci_ops pcifront_bus_ops = {
 	.read = pcifront_bus_read,
 	.write = pcifront_bus_write,
 };
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1ffd03bf8e10..163b7e985ed0 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -445,7 +445,7 @@ static void mn_release(struct mmu_notifier *mn,
 	spin_unlock(&priv->lock);
 }
 
-struct mmu_notifier_ops gntdev_mmu_ops = {
+static struct mmu_notifier_ops gntdev_mmu_ops = {
 	.release                = mn_release,
 	.invalidate_page        = mn_invl_page,
 	.invalidate_range_start = mn_invl_range_start,
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 1d0d95e5b446..eea81cf8a2a5 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -286,10 +286,9 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
-void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
-				    unsigned long frame, int flags,
-				    unsigned page_off,
-				    unsigned length)
+static void gnttab_update_subpage_entry_v2(grant_ref_t ref, domid_t domid,
+					   unsigned long frame, int flags,
+					   unsigned page_off, unsigned length)
 {
 	gnttab_shared.v2[ref].sub_page.frame = frame;
 	gnttab_shared.v2[ref].sub_page.page_off = page_off;
@@ -346,9 +345,9 @@ bool gnttab_subpage_grants_available(void)
 }
 EXPORT_SYMBOL_GPL(gnttab_subpage_grants_available);
 
-void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
-				  int flags, domid_t trans_domid,
-				  grant_ref_t trans_gref)
+static void gnttab_update_trans_entry_v2(grant_ref_t ref, domid_t domid,
+					 int flags, domid_t trans_domid,
+					 grant_ref_t trans_gref)
 {
 	gnttab_shared.v2[ref].transitive.trans_domid = trans_domid;
 	gnttab_shared.v2[ref].transitive.gref = trans_gref;
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 1afb4fba11b4..a52f3ae05d94 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -52,7 +52,7 @@ static unsigned long xen_io_tlb_nslabs;
  * Quick lookup value of the bus address of the IOTLB.
  */
 
-u64 start_dma_addr;
+static u64 start_dma_addr;
 
 static dma_addr_t xen_phys_to_bus(phys_addr_t paddr)
 {
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 89f264c67420..144564e5eb29 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -21,6 +21,7 @@
 #include <asm/xen/hypercall.h>
 #include <asm/xen/page.h>
 #include <asm/xen/hypervisor.h>
+#include <xen/tmem.h>
 
 #define TMEM_CONTROL               0
 #define TMEM_NEW_POOL              1
diff --git a/drivers/xen/xenbus/xenbus_dev_backend.c b/drivers/xen/xenbus/xenbus_dev_backend.c
index be738c43104b..d73000800762 100644
--- a/drivers/xen/xenbus/xenbus_dev_backend.c
+++ b/drivers/xen/xenbus/xenbus_dev_backend.c
@@ -107,7 +107,7 @@ static int xenbus_backend_mmap(struct file *file, struct vm_area_struct *vma)
 	return 0;
 }
 
-const struct file_operations xenbus_backend_fops = {
+static const struct file_operations xenbus_backend_fops = {
 	.open = xenbus_backend_open,
 	.mmap = xenbus_backend_mmap,
 	.unlocked_ioctl = xenbus_backend_ioctl,
diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c
index b793723e724d..91d3d6544a7b 100644
--- a/drivers/xen/xenbus/xenbus_probe.c
+++ b/drivers/xen/xenbus/xenbus_probe.c
@@ -324,8 +324,8 @@ static int cmp_dev(struct device *dev, void *data)
 	return 0;
 }
 
-struct xenbus_device *xenbus_device_find(const char *nodename,
-					 struct bus_type *bus)
+static struct xenbus_device *xenbus_device_find(const char *nodename,
+						struct bus_type *bus)
 {
 	struct xb_find_info info = { .dev = NULL, .nodename = nodename };
 
-- 
cgit v1.2.3


From ece3234a77ebcd5bbeea6b829c9798328d290cae Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <sebastian@breakpoint.cc>
Date: Mon, 13 Aug 2012 22:23:33 +0200
Subject: x86: dt: Use linear irq domain for ioapic(s)

The former conversion to irq_domain_add_legacy() did not fully work
since we miss the irq decs for NR_IRQS_LEGACY+.

Ideally we could use irq_domain_add_simple() or the no-map variant (and
program the virq <-> line mapping directly into ioapic) but this would
require a different irq lookup in "do_IRQ()" and won't work with ACPI
without changes. So this is probably easiest for everyone.

Tested-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Grant Likely <grant.likely@secretlab.ca>
Link: http://lkml.kernel.org/r/20120813202304.GA3529@breakpoint.cc
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/devicetree.c | 51 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3ae2ced4a874..b1581527a236 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -342,6 +342,47 @@ const struct irq_domain_ops ioapic_irq_domain_ops = {
 	.xlate = ioapic_xlate,
 };
 
+static void dt_add_ioapic_domain(unsigned int ioapic_num,
+		struct device_node *np)
+{
+	struct irq_domain *id;
+	struct mp_ioapic_gsi *gsi_cfg;
+	int ret;
+	int num;
+
+	gsi_cfg = mp_ioapic_gsi_routing(ioapic_num);
+	num = gsi_cfg->gsi_end - gsi_cfg->gsi_base + 1;
+
+	id = irq_domain_add_linear(np, num, &ioapic_irq_domain_ops,
+			(void *)ioapic_num);
+	BUG_ON(!id);
+	if (gsi_cfg->gsi_base == 0) {
+		/*
+		 * The first NR_IRQS_LEGACY irq descs are allocated in
+		 * early_irq_init() and need just a mapping. The
+		 * remaining irqs need both. All of them are preallocated
+		 * and assigned so we can keep the 1:1 mapping which the ioapic
+		 * is having.
+		 */
+		ret = irq_domain_associate_many(id, 0, 0, NR_IRQS_LEGACY);
+		if (ret)
+			pr_err("Error mapping legacy IRQs: %d\n", ret);
+
+		if (num > NR_IRQS_LEGACY) {
+			ret = irq_create_strict_mappings(id, NR_IRQS_LEGACY,
+					NR_IRQS_LEGACY, num - NR_IRQS_LEGACY);
+			if (ret)
+				pr_err("Error creating mapping for the "
+						"remaining IRQs: %d\n", ret);
+		}
+		irq_set_default_host(id);
+	} else {
+		ret = irq_create_strict_mappings(id, gsi_cfg->gsi_base, 0, num);
+		if (ret)
+			pr_err("Error creating IRQ mapping: %d\n", ret);
+	}
+}
+
 static void __init ioapic_add_ofnode(struct device_node *np)
 {
 	struct resource r;
@@ -356,15 +397,7 @@ static void __init ioapic_add_ofnode(struct device_node *np)
 
 	for (i = 0; i < nr_ioapics; i++) {
 		if (r.start == mpc_ioapic_addr(i)) {
-			struct irq_domain *id;
-			struct mp_ioapic_gsi *gsi_cfg;
-
-			gsi_cfg = mp_ioapic_gsi_routing(i);
-
-			id = irq_domain_add_legacy(np, 32, gsi_cfg->gsi_base, 0,
-						   &ioapic_irq_domain_ops,
-						   (void*)i);
-			BUG_ON(!id);
+			dt_add_ioapic_domain(i, np);
 			return;
 		}
 	}
-- 
cgit v1.2.3


From d3a8009b1731abb7026a840d1c2701f877f9429f Mon Sep 17 00:00:00 2001
From: Yuanhan Liu <yliu.null@gmail.com>
Date: Mon, 6 Aug 2012 22:13:00 +0800
Subject: x86/irq/i8259: Fix incorrect comment

Signed-off-by: Yuanhan Liu <yliu.null@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/i8259.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 36d1853e91af..9a5c460404dc 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -263,7 +263,7 @@ static void i8259A_shutdown(void)
 	 * out of.
 	 */
 	outb(0xff, PIC_MASTER_IMR);	/* mask all of 8259A-1 */
-	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-1 */
+	outb(0xff, PIC_SLAVE_IMR);	/* mask all of 8259A-2 */
 }
 
 static struct syscore_ops i8259_syscore_ops = {
-- 
cgit v1.2.3


From 8e3d9d061b5d132217629e7b5635ff0c02488e65 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 10:57:42 +0800
Subject: KVM: x86: fix possible infinite loop caused by reexecute_instruction

Currently, we reexecute all unhandleable instructions if they do not
access on the mmio, however, it can not work if host map the readonly
memory to guest. If the instruction try to write this kind of memory,
it will fault again when guest retry it, then we will goto a infinite
loop: retry instruction -> write #PF -> emulation fail ->
retry instruction -> ...

Fix it by retrying the instruction only when it faults on the writable
memory

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fb0d93788bfb..704680d0fa3e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4473,6 +4473,7 @@ static int handle_emulation_failure(struct kvm_vcpu *vcpu)
 static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 {
 	gpa_t gpa;
+	pfn_t pfn;
 
 	if (tdp_enabled)
 		return false;
@@ -4490,8 +4491,17 @@ static bool reexecute_instruction(struct kvm_vcpu *vcpu, gva_t gva)
 	if (gpa == UNMAPPED_GVA)
 		return true; /* let cpu generate fault */
 
-	if (!kvm_is_error_hva(gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT)))
+	/*
+	 * Do not retry the unhandleable instruction if it faults on the
+	 * readonly host memory, otherwise it will goto a infinite loop:
+	 * retry instruction -> write #PF -> emulation fail -> retry
+	 * instruction -> ...
+	 */
+	pfn = gfn_to_pfn(vcpu->kvm, gpa_to_gfn(gpa));
+	if (!is_error_pfn(pfn)) {
+		kvm_release_pfn_clean(pfn);
 		return true;
+	}
 
 	return false;
 }
-- 
cgit v1.2.3


From 037d92dc5d4691ae7cf44699c55ca83b1b441992 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 10:59:12 +0800
Subject: KVM: introduce gfn_to_pfn_memslot_atomic

It can instead of hva_to_pfn_atomic

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c       |  5 +----
 include/linux/kvm_host.h |  3 ++-
 virt/kvm/kvm_main.c      | 14 ++++++++------
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9651c2cd0005..5548971ae80d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2510,15 +2510,12 @@ static pfn_t pte_prefetch_gfn_to_pfn(struct kvm_vcpu *vcpu, gfn_t gfn,
 				     bool no_dirty_log)
 {
 	struct kvm_memory_slot *slot;
-	unsigned long hva;
 
 	slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, no_dirty_log);
 	if (!slot)
 		return KVM_PFN_ERR_FAULT;
 
-	hva = gfn_to_hva_memslot(slot, gfn);
-
-	return hva_to_pfn_atomic(hva);
+	return gfn_to_pfn_memslot_atomic(slot, gfn);
 }
 
 static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d4bd4d41e355..52c86e4f6d8c 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -462,7 +462,6 @@ void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
 
-pfn_t hva_to_pfn_atomic(unsigned long addr);
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_async(struct kvm *kvm, gfn_t gfn, bool *async,
 		       bool write_fault, bool *writable);
@@ -470,6 +469,8 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn);
 pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable);
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
+
 void kvm_release_pfn_dirty(pfn_t pfn);
 void kvm_release_pfn_clean(pfn_t pfn);
 void kvm_set_pfn_dirty(pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 7b94d70a323f..543f9b7e5aa2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1102,12 +1102,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	return pfn;
 }
 
-pfn_t hva_to_pfn_atomic(unsigned long addr)
-{
-	return hva_to_pfn(addr, true, NULL, true, NULL);
-}
-EXPORT_SYMBOL_GPL(hva_to_pfn_atomic);
-
 static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 			  bool write_fault, bool *writable)
 {
@@ -1155,6 +1149,14 @@ pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 	return hva_to_pfn(addr, false, NULL, true, NULL);
 }
 
+pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
+{
+	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
+
+	return hva_to_pfn(addr, true, NULL, true, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
+
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 								  int nr_pages)
 {
-- 
cgit v1.2.3


From 4d8b81abc47b83a1939e59df2fdb0e98dfe0eedd Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Tue, 21 Aug 2012 11:02:51 +0800
Subject: KVM: introduce readonly memslot

In current code, if we map a readonly memory space from host to guest
and the page is not currently mapped in the host, we will get a fault
pfn and async is not allowed, then the vm will crash

We introduce readonly memory region to map ROM/ROMD to the guest, read access
is happy for readonly memslot, write access on readonly memslot will cause
KVM_EXIT_MMIO exit

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 10 ++--
 arch/x86/include/asm/kvm.h        |  1 +
 arch/x86/kvm/mmu.c                |  9 ++++
 arch/x86/kvm/x86.c                |  1 +
 include/linux/kvm.h               |  6 ++-
 include/linux/kvm_host.h          |  7 +--
 virt/kvm/kvm_main.c               | 96 +++++++++++++++++++++++++++++++--------
 7 files changed, 102 insertions(+), 28 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index bf33aaa4c59f..b91bfd43f007 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -857,7 +857,8 @@ struct kvm_userspace_memory_region {
 };
 
 /* for kvm_memory_region::flags */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)
 
 This ioctl allows the user to create or modify a guest physical memory
 slot.  When changing an existing slot, it may be moved in the guest
@@ -873,9 +874,12 @@ It is recommended that the lower 21 bits of guest_phys_addr and userspace_addr
 be identical.  This allows large pages in the guest to be backed by large
 pages in the host.
 
-The flags field supports just one flag, KVM_MEM_LOG_DIRTY_PAGES, which
+The flags field supports two flag, KVM_MEM_LOG_DIRTY_PAGES, which
 instructs kvm to keep track of writes to memory within the slot.  See
-the KVM_GET_DIRTY_LOG ioctl.
+the KVM_GET_DIRTY_LOG ioctl. Another flag is KVM_MEM_READONLY when the
+KVM_CAP_READONLY_MEM capability, it indicates the guest memory is read-only,
+that means, guest is only allowed to read it. Writes will be posted to
+userspace as KVM_EXIT_MMIO exits.
 
 When the KVM_CAP_SYNC_MMU capability, changes in the backing of the memory
 region are automatically reflected into the guest.  For example, an mmap()
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617efd67f..521bf252e34b 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -25,6 +25,7 @@
 #define __KVM_HAVE_DEBUGREGS
 #define __KVM_HAVE_XSAVE
 #define __KVM_HAVE_XCRS
+#define __KVM_HAVE_READONLY_MEM
 
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5548971ae80d..8e312a2e1412 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2647,6 +2647,15 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 
 static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, pfn_t pfn)
 {
+	/*
+	 * Do not cache the mmio info caused by writing the readonly gfn
+	 * into the spte otherwise read access on readonly gfn also can
+	 * caused mmio page fault and treat it as mmio access.
+	 * Return 1 to tell kvm to emulate it.
+	 */
+	if (pfn == KVM_PFN_ERR_RO_FAULT)
+		return 1;
+
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(gfn_to_hva(vcpu->kvm, gfn), current);
 		return 0;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 704680d0fa3e..42bbf4187d20 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2175,6 +2175,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_GET_TSC_KHZ:
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
+	case KVM_CAP_READONLY_MEM:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index 2de335d7f63e..d808694673f9 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -106,7 +106,8 @@ struct kvm_userspace_memory_region {
  * other bits are reserved for kvm internal use which are defined in
  * include/linux/kvm_host.h.
  */
-#define KVM_MEM_LOG_DIRTY_PAGES  1UL
+#define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
+#define KVM_MEM_READONLY	(1UL << 1)
 
 /* for KVM_IRQ_LINE */
 struct kvm_irq_level {
@@ -621,6 +622,9 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#ifdef __KVM_HAVE_READONLY_MEM
+#define KVM_CAP_READONLY_MEM 81
+#endif
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a913ac709a9d..5972c9845ddb 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -465,6 +465,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_dirty(struct page *page);
@@ -792,12 +793,6 @@ hva_to_gfn_memslot(unsigned long hva, struct kvm_memory_slot *slot)
 	return slot->base_gfn + gfn_offset;
 }
 
-static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
-					       gfn_t gfn)
-{
-	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
-}
-
 static inline gpa_t gfn_to_gpa(gfn_t gfn)
 {
 	return (gpa_t)gfn << PAGE_SHIFT;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index e3e1658c491d..3416f8a31f63 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -680,7 +680,13 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
 {
-	if (mem->flags & ~KVM_MEM_LOG_DIRTY_PAGES)
+	u32 valid_flags = KVM_MEM_LOG_DIRTY_PAGES;
+
+#ifdef KVM_CAP_READONLY_MEM
+	valid_flags |= KVM_MEM_READONLY;
+#endif
+
+	if (mem->flags & ~valid_flags)
 		return -EINVAL;
 
 	return 0;
@@ -973,18 +979,45 @@ out:
 	return size;
 }
 
-static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
-				     gfn_t *nr_pages)
+static bool memslot_is_readonly(struct kvm_memory_slot *slot)
+{
+	return slot->flags & KVM_MEM_READONLY;
+}
+
+static unsigned long __gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+					  gfn_t gfn)
+{
+	return slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE;
+}
+
+static unsigned long __gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				       gfn_t *nr_pages, bool write)
 {
 	if (!slot || slot->flags & KVM_MEMSLOT_INVALID)
 		return KVM_HVA_ERR_BAD;
 
+	if (memslot_is_readonly(slot) && write)
+		return KVM_HVA_ERR_RO_BAD;
+
 	if (nr_pages)
 		*nr_pages = slot->npages - (gfn - slot->base_gfn);
 
-	return gfn_to_hva_memslot(slot, gfn);
+	return __gfn_to_hva_memslot(slot, gfn);
 }
 
+static unsigned long gfn_to_hva_many(struct kvm_memory_slot *slot, gfn_t gfn,
+				     gfn_t *nr_pages)
+{
+	return __gfn_to_hva_many(slot, gfn, nr_pages, true);
+}
+
+unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot,
+				 gfn_t gfn)
+{
+	return gfn_to_hva_many(slot, gfn, NULL);
+}
+EXPORT_SYMBOL_GPL(gfn_to_hva_memslot);
+
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 {
 	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
@@ -997,7 +1030,7 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  */
 static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
 {
-	return gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL);
+	return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }
 
 static int kvm_read_hva(void *data, void __user *hva, int len)
@@ -1106,6 +1139,17 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 	return npages;
 }
 
+static bool vma_is_valid(struct vm_area_struct *vma, bool write_fault)
+{
+	if (unlikely(!(vma->vm_flags & VM_READ)))
+		return false;
+
+	if (write_fault && (unlikely(!(vma->vm_flags & VM_WRITE))))
+		return false;
+
+	return true;
+}
+
 /*
  * Pin guest page in memory and return its pfn.
  * @addr: host virtual address which maps memory to the guest
@@ -1130,8 +1174,6 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	/* we can do it either atomically or asynchronously, not both */
 	BUG_ON(atomic && async);
 
-	BUG_ON(!write_fault && !writable);
-
 	if (hva_to_pfn_fast(addr, atomic, async, write_fault, writable, &pfn))
 		return pfn;
 
@@ -1158,7 +1200,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 			vma->vm_pgoff;
 		BUG_ON(!kvm_is_mmio_pfn(pfn));
 	} else {
-		if (async && (vma->vm_flags & VM_WRITE))
+		if (async && vma_is_valid(vma, write_fault))
 			*async = true;
 		pfn = KVM_PFN_ERR_FAULT;
 	}
@@ -1167,19 +1209,40 @@ exit:
 	return pfn;
 }
 
+static pfn_t
+__gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool atomic,
+		     bool *async, bool write_fault, bool *writable)
+{
+	unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
+
+	if (addr == KVM_HVA_ERR_RO_BAD)
+		return KVM_PFN_ERR_RO_FAULT;
+
+	if (kvm_is_error_hva(addr))
+		return KVM_PFN_ERR_BAD;
+
+	/* Do not map writable pfn in the readonly memslot. */
+	if (writable && memslot_is_readonly(slot)) {
+		*writable = false;
+		writable = NULL;
+	}
+
+	return hva_to_pfn(addr, atomic, async, write_fault,
+			  writable);
+}
+
 static pfn_t __gfn_to_pfn(struct kvm *kvm, gfn_t gfn, bool atomic, bool *async,
 			  bool write_fault, bool *writable)
 {
-	unsigned long addr;
+	struct kvm_memory_slot *slot;
 
 	if (async)
 		*async = false;
 
-	addr = gfn_to_hva(kvm, gfn);
-	if (kvm_is_error_hva(addr))
-		return KVM_PFN_ERR_BAD;
+	slot = gfn_to_memslot(kvm, gfn);
 
-	return hva_to_pfn(addr, atomic, async, write_fault, writable);
+	return __gfn_to_pfn_memslot(slot, gfn, atomic, async, write_fault,
+				    writable);
 }
 
 pfn_t gfn_to_pfn_atomic(struct kvm *kvm, gfn_t gfn)
@@ -1210,15 +1273,12 @@ EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
 pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-	return hva_to_pfn(addr, false, NULL, true, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
 }
 
 pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	unsigned long addr = gfn_to_hva_memslot(slot, gfn);
-
-	return hva_to_pfn(addr, true, NULL, true, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
 
-- 
cgit v1.2.3


From 4dbf32c30f7e5379588a692c7bf80b05d9ef8026 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 23 Jul 2012 19:05:53 +0200
Subject: x86, microcode: Save an indentation level in reload_for_cpu

Invert the uci->valid check so that the later block can be aligned on
the first indentation level of the function. No functional change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-3-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62db6a1..63a956865022 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -276,19 +276,18 @@ static struct platform_device	*microcode_pdev;
 static int reload_for_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	enum ucode_state ustate;
 	int err = 0;
 
-	if (uci->valid) {
-		enum ucode_state ustate;
-
-		ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
-		if (ustate == UCODE_OK)
-			apply_microcode_on_target(cpu);
-		else
-			if (ustate == UCODE_ERROR)
-				err = -EINVAL;
-	}
+	if (!uci->valid)
+		return err;
 
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	if (ustate == UCODE_OK)
+		apply_microcode_on_target(cpu);
+	else
+		if (ustate == UCODE_ERROR)
+			err = -EINVAL;
 	return err;
 }
 
-- 
cgit v1.2.3


From bb9d3e473d5b324907e15dff4e54410b28ea50e2 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 3 Aug 2012 15:26:50 +0200
Subject: x86, microcode: Drop uci->mc check on resume path

Remove the uci->mc check on the cpu resume path because the low-level
drivers do that anyway.

More importantly, though, this fixes a contrived and obscure but still
important case. Imagine the following:

* boot machine, no new microcode in /lib/firmware

* a subset of the CPUs is offlined

* in the meantime, user puts new fresh microcode container into
/lib/firmware and reloads it by doing
$ echo 1 > /sys/devices/system/cpu/microcode/reload

* offlined cores come back online and they don't get the newer microcode
applied due to this check.

Later patches take care of the issue on AMD.

While at it, cleanup code around it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-4-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 63a956865022..706a5c9b8eb2 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -369,13 +369,10 @@ static void microcode_fini_cpu(int cpu)
 
 static enum ucode_state microcode_resume_cpu(int cpu)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
-	if (!uci->mc)
-		return UCODE_NFOUND;
-
 	pr_debug("CPU%d updated upon resume\n", cpu);
-	apply_microcode_on_target(cpu);
+
+	if (apply_microcode_on_target(cpu))
+		return UCODE_ERROR;
 
 	return UCODE_OK;
 }
@@ -404,14 +401,11 @@ static enum ucode_state microcode_init_cpu(int cpu)
 static enum ucode_state microcode_update_cpu(int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	enum ucode_state ustate;
 
 	if (uci->valid)
-		ustate = microcode_resume_cpu(cpu);
-	else
-		ustate = microcode_init_cpu(cpu);
+		return microcode_resume_cpu(cpu);
 
-	return ustate;
+	return microcode_init_cpu(cpu);
 }
 
 static int mc_device_add(struct device *dev, struct subsys_interface *sif)
-- 
cgit v1.2.3


From 09c3f0d883300c8fc2bb62e9a70cf89a2ada9a80 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 23 Jul 2012 20:15:10 +0200
Subject: x86, microcode: Cleanup cpu hotplug notifier callback

Mask out CPU_TASKS_FROZEN bit so that all _FROZEN cases can be dropped.
Also, add some more comments as to why CPU_ONLINE falls through to
CPU_DOWN_FAILED (no break), and for the CPU_DEAD case. Realign debug
printks better.

Idea blatantly stolen from a tglx patch:
http://marc.info/?l=linux-kernel&m=134267779513862

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-5-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_core.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 706a5c9b8eb2..dcde544e012c 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -470,34 +470,41 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu)
 	struct device *dev;
 
 	dev = get_cpu_device(cpu);
-	switch (action) {
+
+	switch (action & ~CPU_TASKS_FROZEN) {
 	case CPU_ONLINE:
-	case CPU_ONLINE_FROZEN:
 		microcode_update_cpu(cpu);
-	case CPU_DOWN_FAILED:
-	case CPU_DOWN_FAILED_FROZEN:
 		pr_debug("CPU%d added\n", cpu);
+		/*
+		 * "break" is missing on purpose here because we want to fall
+		 * through in order to create the sysfs group.
+		 */
+
+	case CPU_DOWN_FAILED:
 		if (sysfs_create_group(&dev->kobj, &mc_attr_group))
 			pr_err("Failed to create group for CPU%d\n", cpu);
 		break;
+
 	case CPU_DOWN_PREPARE:
-	case CPU_DOWN_PREPARE_FROZEN:
 		/* Suspend is in progress, only remove the interface */
 		sysfs_remove_group(&dev->kobj, &mc_attr_group);
 		pr_debug("CPU%d removed\n", cpu);
 		break;
 
 	/*
+	 * case CPU_DEAD:
+	 *
 	 * When a CPU goes offline, don't free up or invalidate the copy of
 	 * the microcode in kernel memory, so that we can reuse it when the
 	 * CPU comes back online without unnecessarily requesting the userspace
 	 * for it again.
 	 */
-	case CPU_UP_CANCELED_FROZEN:
-		/* The CPU refused to come up during a system resume */
-		microcode_fini_cpu(cpu);
-		break;
 	}
+
+	/* The CPU refused to come up during a system resume */
+	if (action == CPU_UP_CANCELED_FROZEN)
+		microcode_fini_cpu(cpu);
+
 	return NOTIFY_OK;
 }
 
-- 
cgit v1.2.3


From e43f6e67ec1c142550860bbe0b51166c5ee4cac8 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 19:17:01 +0200
Subject: x86, microcode: Straighten out Kconfig text

Update and clarify Kconfig help text along with menu names.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-6-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/Kconfig | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..1ccccc6efb37 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -982,25 +982,25 @@ config X86_REBOOTFIXUPS
 	  Say N otherwise.
 
 config MICROCODE
-	tristate "/dev/cpu/microcode - microcode support"
+	tristate "CPU microcode loading support"
 	select FW_LOADER
 	---help---
+
 	  If you say Y here, you will be able to update the microcode on
 	  certain Intel and AMD processors. The Intel support is for the
-	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III,
-	  Pentium 4, Xeon etc. The AMD support is for family 0x10 and
-	  0x11 processors, e.g. Opteron, Phenom and Turion 64 Ultra.
-	  You will obviously need the actual microcode binary data itself
-	  which is not shipped with the Linux kernel.
+	  IA32 family, e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4,
+	  Xeon etc. The AMD support is for families 0x10 and later. You will
+	  obviously need the actual microcode binary data itself which is not
+	  shipped with the Linux kernel.
 
 	  This option selects the general module only, you need to select
 	  at least one vendor specific module as well.
 
-	  To compile this driver as a module, choose M here: the
-	  module will be called microcode.
+	  To compile this driver as a module, choose M here: the module
+	  will be called microcode.
 
 config MICROCODE_INTEL
-	bool "Intel microcode patch loading support"
+	bool "Intel microcode loading support"
 	depends on MICROCODE
 	default MICROCODE
 	select FW_LOADER
@@ -1013,7 +1013,7 @@ config MICROCODE_INTEL
 	  <http://www.urbanmyth.org/microcode/>.
 
 config MICROCODE_AMD
-	bool "AMD microcode patch loading support"
+	bool "AMD microcode loading support"
 	depends on MICROCODE
 	select FW_LOADER
 	---help---
-- 
cgit v1.2.3


From e7e632f5ba240fbc313c49ed6559681ea57534e9 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 20 Jul 2012 14:12:21 +0200
Subject: x86, microcode, AMD: Remove useless get_ucode_data wrapper

get_ucode_data was a trivial memcpy wrapper. Remove it so as not to
obfuscate code unnecessarily with no obvious gain.

No functional change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-7-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/microcode.h | 6 ------
 arch/x86/kernel/microcode_amd.c  | 4 ++--
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 4ebe157bf73d..8813be600995 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -49,12 +49,6 @@ static inline struct microcode_ops * __init init_intel_microcode(void)
 #ifdef CONFIG_MICROCODE_AMD
 extern struct microcode_ops * __init init_amd_microcode(void);
 extern void __exit exit_amd_microcode(void);
-
-static inline void get_ucode_data(void *to, const u8 *from, size_t n)
-{
-	memcpy(to, from, n);
-}
-
 #else
 static inline struct microcode_ops * __init init_amd_microcode(void)
 {
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 82746f942cd8..94213387a3d1 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -183,7 +183,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
 	memset(patch, 0, PAGE_SIZE);
 
 	/* all looks ok, get the binary patch */
-	get_ucode_data(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
+	memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
 
 	return actual_size;
 }
@@ -238,7 +238,7 @@ static int install_equiv_cpu_table(const u8 *buf)
 		return -ENOMEM;
 	}
 
-	get_ucode_data(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
+	memcpy(equiv_cpu_table, buf + CONTAINER_HDR_SZ, size);
 
 	/* add header length */
 	return size + CONTAINER_HDR_SZ;
-- 
cgit v1.2.3


From 685ca6d797af9d41164dd64dd60145d4946fc152 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 20 Jun 2012 16:17:51 +0200
Subject: x86, microcode, AMD: Check before applying a patch

Make sure we're actually applying a microcode patch to a core which
really needs it.

This brings only a very very very minor slowdown on F10:

0.032218828 sec vs 0.056010626 sec with this patch.

And small speedup on F15:

0.487089449 sec vs 0.180551162 sec (from perf output).

Also, fixup comments while at it.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-8-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 94213387a3d1..8fdf7d99b804 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -202,11 +202,18 @@ static int apply_microcode_amd(int cpu)
 	if (mc_amd == NULL)
 		return 0;
 
-	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
-	/* get patch id after patching */
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
-	/* check current patch id and patch's id for match */
+	/* need to apply patch? */
+	if (rev >= mc_amd->hdr.patch_id) {
+		c->microcode = rev;
+		return 0;
+	}
+
+	wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code);
+
+	/* verify patch application was successful */
+	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 	if (rev != mc_amd->hdr.patch_id) {
 		pr_err("CPU%d: update failed for patch_level=0x%08x\n",
 		       cpu, mc_amd->hdr.patch_id);
-- 
cgit v1.2.3


From 5f5b747282c6cc57b91baba37f76de27398b9e60 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 25 Jul 2012 20:06:54 +0200
Subject: x86, microcode, AMD: Read CPUID(1).EAX on the correct cpu

Read the CPUID(1).EAX leaf at the correct cpu and use it to search the
equivalence table for matching microcode patch. No functionality change.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-9-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 8fdf7d99b804..25d34b177482 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -82,6 +82,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
+	csig->sig = cpuid_eax(0x00000001);
 	csig->rev = c->microcode;
 	pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev);
 
@@ -118,16 +119,15 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-static u16 find_equiv_id(void)
+static u16 find_equiv_id(unsigned int cpu)
 {
-	unsigned int current_cpu_id, i = 0;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int i = 0;
 
 	BUG_ON(equiv_cpu_table == NULL);
 
-	current_cpu_id = cpuid_eax(0x00000001);
-
 	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (current_cpu_id == equiv_cpu_table[i].installed_cpu)
+		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
 			return equiv_cpu_table[i].equiv_cpu;
 
 		i++;
@@ -150,7 +150,7 @@ static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
 	patch_size = *(u32 *)(ucode_ptr + 4);
 	*current_size = patch_size + SECTION_HDR_SIZE;
 
-	equiv_cpu_id = find_equiv_id();
+	equiv_cpu_id = find_equiv_id(cpu);
 	if (!equiv_cpu_id)
 		return 0;
 
-- 
cgit v1.2.3


From 48e30685caa8bdc4b8d4417d8ac31db59689742c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 26 Jul 2012 15:51:00 +0200
Subject: x86, microcode: Add a refresh firmware flag to ->request_microcode_fw

This is done in preparation for teaching the ucode driver to either load
a new ucode patches container from userspace or use an already cached
version. No functionality change in this patch.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-10-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/microcode.h  |  4 ++--
 arch/x86/kernel/microcode_amd.c   |  3 ++-
 arch/x86/kernel/microcode_core.c  | 11 ++++++-----
 arch/x86/kernel/microcode_intel.c |  3 ++-
 4 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h
index 8813be600995..43d921b4752c 100644
--- a/arch/x86/include/asm/microcode.h
+++ b/arch/x86/include/asm/microcode.h
@@ -15,8 +15,8 @@ struct microcode_ops {
 	enum ucode_state (*request_microcode_user) (int cpu,
 				const void __user *buf, size_t size);
 
-	enum ucode_state (*request_microcode_fw) (int cpu,
-				struct device *device);
+	enum ucode_state (*request_microcode_fw) (int cpu, struct device *,
+						  bool refresh_fw);
 
 	void (*microcode_fini_cpu) (int cpu);
 
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 25d34b177482..94ecdaa24052 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -330,7 +330,8 @@ out:
  *
  * These might be larger than 2K.
  */
-static enum ucode_state request_microcode_amd(int cpu, struct device *device)
+static enum ucode_state request_microcode_amd(int cpu, struct device *device,
+					      bool refresh_fw)
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
 	const struct firmware *fw;
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index dcde544e012c..9420972e98fe 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -282,7 +282,7 @@ static int reload_for_cpu(int cpu)
 	if (!uci->valid)
 		return err;
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev, true);
 	if (ustate == UCODE_OK)
 		apply_microcode_on_target(cpu);
 	else
@@ -377,7 +377,7 @@ static enum ucode_state microcode_resume_cpu(int cpu)
 	return UCODE_OK;
 }
 
-static enum ucode_state microcode_init_cpu(int cpu)
+static enum ucode_state microcode_init_cpu(int cpu, bool refresh_fw)
 {
 	enum ucode_state ustate;
 
@@ -388,7 +388,8 @@ static enum ucode_state microcode_init_cpu(int cpu)
 	if (system_state != SYSTEM_RUNNING)
 		return UCODE_NFOUND;
 
-	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev);
+	ustate = microcode_ops->request_microcode_fw(cpu, &microcode_pdev->dev,
+						     refresh_fw);
 
 	if (ustate == UCODE_OK) {
 		pr_debug("CPU%d updated upon init\n", cpu);
@@ -405,7 +406,7 @@ static enum ucode_state microcode_update_cpu(int cpu)
 	if (uci->valid)
 		return microcode_resume_cpu(cpu);
 
-	return microcode_init_cpu(cpu);
+	return microcode_init_cpu(cpu, false);
 }
 
 static int mc_device_add(struct device *dev, struct subsys_interface *sif)
@@ -421,7 +422,7 @@ static int mc_device_add(struct device *dev, struct subsys_interface *sif)
 	if (err)
 		return err;
 
-	if (microcode_init_cpu(cpu) == UCODE_ERROR)
+	if (microcode_init_cpu(cpu, true) == UCODE_ERROR)
 		return -EINVAL;
 
 	return err;
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index 0327e2b3c408..3544aed39338 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -405,7 +405,8 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
 	return 0;
 }
 
-static enum ucode_state request_microcode_fw(int cpu, struct device *device)
+static enum ucode_state request_microcode_fw(int cpu, struct device *device,
+					     bool refresh_fw)
 {
 	char name[30];
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-- 
cgit v1.2.3


From c96d2c0905cc48e34f2b37b775b59932c416b343 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 14:55:01 +0200
Subject: x86, microcode, AMD: Add reverse equiv table search

We search the equivalence table using the CPUID(1) signature of the
CPU in order to get the equivalence ID of the patch which we need to
apply. Add a function which does the reverse - it will be needed in
later patches.

While at it, pull the other equiv table function up in the file so that
it can be used by other functionality without forward declarations.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-11-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 46 +++++++++++++++++++++++++++--------------
 1 file changed, 30 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 94ecdaa24052..03ed5af7053d 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -78,6 +78,36 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 /* page-sized ucode patch buffer */
 void *patch;
 
+static u16 find_equiv_id(unsigned int cpu)
+{
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	int i = 0;
+
+	BUG_ON(equiv_cpu_table == NULL);
+
+	while (equiv_cpu_table[i].installed_cpu != 0) {
+		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
+			return equiv_cpu_table[i].equiv_cpu;
+
+		i++;
+	}
+	return 0;
+}
+
+static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
+{
+	int i = 0;
+
+	BUG_ON(!equiv_cpu_table);
+
+	while (equiv_cpu_table[i].equiv_cpu != 0) {
+		if (equiv_cpu == equiv_cpu_table[i].equiv_cpu)
+			return equiv_cpu_table[i].installed_cpu;
+		i++;
+	}
+	return 0;
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -119,22 +149,6 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-static u16 find_equiv_id(unsigned int cpu)
-{
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	int i = 0;
-
-	BUG_ON(equiv_cpu_table == NULL);
-
-	while (equiv_cpu_table[i].installed_cpu != 0) {
-		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
-			return equiv_cpu_table[i].equiv_cpu;
-
-		i++;
-	}
-	return 0;
-}
-
 /*
  * we signal a good patch is found by returning its size > 0
  */
-- 
cgit v1.2.3


From a3eb3b4da106a23b5d41bf915726172e31654a65 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 15:38:18 +0200
Subject: x86, microcode, AMD: Add a small, per-family patches cache

This is a trivial cache which collects all ucode patches for the current
family of CPUs on the system. If a newer patch appears due to the
container file being updated in userspace, we replace our cached version
with the new one.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-12-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 67 ++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 66 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 03ed5af7053d..cacdc9a5ee49 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -78,12 +78,22 @@ static struct equiv_cpu_entry *equiv_cpu_table;
 /* page-sized ucode patch buffer */
 void *patch;
 
+struct ucode_patch {
+	struct list_head plist;
+	void *data;
+	u32 patch_id;
+	u16 equiv_cpu;
+};
+
+static LIST_HEAD(pcache);
+
 static u16 find_equiv_id(unsigned int cpu)
 {
 	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
 	int i = 0;
 
-	BUG_ON(equiv_cpu_table == NULL);
+	if (!equiv_cpu_table)
+		return 0;
 
 	while (equiv_cpu_table[i].installed_cpu != 0) {
 		if (uci->cpu_sig.sig == equiv_cpu_table[i].installed_cpu)
@@ -108,6 +118,61 @@ static u32 find_cpu_family_by_equiv_cpu(u16 equiv_cpu)
 	return 0;
 }
 
+/*
+ * a small, trivial cache of per-family ucode patches
+ */
+static struct ucode_patch *cache_find_patch(u16 equiv_cpu)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist)
+		if (p->equiv_cpu == equiv_cpu)
+			return p;
+	return NULL;
+}
+
+static void update_cache(struct ucode_patch *new_patch)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry(p, &pcache, plist) {
+		if (p->equiv_cpu == new_patch->equiv_cpu) {
+			if (p->patch_id >= new_patch->patch_id)
+				/* we already have the latest patch */
+				return;
+
+			list_replace(&p->plist, &new_patch->plist);
+			kfree(p->data);
+			kfree(p);
+			return;
+		}
+	}
+	/* no patch found, add it */
+	list_add_tail(&new_patch->plist, &pcache);
+}
+
+static void free_cache(void)
+{
+	struct ucode_patch *p;
+
+	list_for_each_entry_reverse(p, &pcache, plist) {
+		__list_del(p->plist.prev, p->plist.next);
+		kfree(p->data);
+		kfree(p);
+	}
+}
+
+static struct ucode_patch *find_patch(unsigned int cpu)
+{
+	u16 equiv_id;
+
+	equiv_id = find_equiv_id(cpu);
+	if (!equiv_id)
+		return NULL;
+
+	return cache_find_patch(equiv_id);
+}
+
 static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
-- 
cgit v1.2.3


From 2efb05e8e9fa3510044e007b90263c73b6a83f84 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 1 Aug 2012 16:16:13 +0200
Subject: x86, microcode, AMD: Rewrite patch application procedure

Limit the access to userspace only on the BSP where we load the
container, verify the patches in it and put them in the patch cache.
Then, at application time, we lookup the correct patch in the cache and
use it.

When we need to reload the userspace container, we do that over the
reload interface:

echo 1 > /sys/devices/system/cpu/microcode/reload

which reloads (a possibly newer) container from userspace and applies
then the newest patches from there.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Link: http://lkml.kernel.org/r/1344361461-10076-13-git-send-email-bp@amd64.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/kernel/microcode_amd.c | 236 ++++++++++++++++++++--------------------
 1 file changed, 121 insertions(+), 115 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index cacdc9a5ee49..5511216b4434 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -75,9 +75,6 @@ struct microcode_amd {
 
 static struct equiv_cpu_entry *equiv_cpu_table;
 
-/* page-sized ucode patch buffer */
-void *patch;
-
 struct ucode_patch {
 	struct list_head plist;
 	void *data;
@@ -184,7 +181,7 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig)
 	return 0;
 }
 
-static unsigned int verify_ucode_size(int cpu, u32 patch_size,
+static unsigned int verify_patch_size(int cpu, u32 patch_size,
 				      unsigned int size)
 {
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
@@ -214,73 +211,25 @@ static unsigned int verify_ucode_size(int cpu, u32 patch_size,
 	return patch_size;
 }
 
-/*
- * we signal a good patch is found by returning its size > 0
- */
-static int get_matching_microcode(int cpu, const u8 *ucode_ptr,
-				  unsigned int leftover_size, int rev,
-				  unsigned int *current_size)
-{
-	struct microcode_header_amd *mc_hdr;
-	unsigned int actual_size, patch_size;
-	u16 equiv_cpu_id;
-
-	/* size of the current patch we're staring at */
-	patch_size = *(u32 *)(ucode_ptr + 4);
-	*current_size = patch_size + SECTION_HDR_SIZE;
-
-	equiv_cpu_id = find_equiv_id(cpu);
-	if (!equiv_cpu_id)
-		return 0;
-
-	/*
-	 * let's look at the patch header itself now
-	 */
-	mc_hdr = (struct microcode_header_amd *)(ucode_ptr + SECTION_HDR_SIZE);
-
-	if (mc_hdr->processor_rev_id != equiv_cpu_id)
-		return 0;
-
-	/* ucode might be chipset specific -- currently we don't support this */
-	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
-		pr_err("CPU%d: chipset specific code not yet supported\n",
-		       cpu);
-		return 0;
-	}
-
-	if (mc_hdr->patch_id <= rev)
-		return 0;
-
-	/*
-	 * now that the header looks sane, verify its size
-	 */
-	actual_size = verify_ucode_size(cpu, patch_size, leftover_size);
-	if (!actual_size)
-		return 0;
-
-	/* clear the patch buffer */
-	memset(patch, 0, PAGE_SIZE);
-
-	/* all looks ok, get the binary patch */
-	memcpy(patch, ucode_ptr + SECTION_HDR_SIZE, actual_size);
-
-	return actual_size;
-}
-
 static int apply_microcode_amd(int cpu)
 {
-	u32 rev, dummy;
-	int cpu_num = raw_smp_processor_id();
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
-	struct microcode_amd *mc_amd = uci->mc;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct microcode_amd *mc_amd;
+	struct ucode_cpu_info *uci;
+	struct ucode_patch *p;
+	u32 rev, dummy;
+
+	BUG_ON(raw_smp_processor_id() != cpu);
 
-	/* We should bind the task to the CPU */
-	BUG_ON(cpu_num != cpu);
+	uci = ucode_cpu_info + cpu;
 
-	if (mc_amd == NULL)
+	p = find_patch(cpu);
+	if (!p)
 		return 0;
 
+	mc_amd  = p->data;
+	uci->mc = p->data;
+
 	rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
 
 	/* need to apply patch? */
@@ -336,61 +285,113 @@ static void free_equiv_cpu_table(void)
 	equiv_cpu_table = NULL;
 }
 
-static enum ucode_state
-generic_load_microcode(int cpu, const u8 *data, size_t size)
+static void cleanup(void)
 {
-	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-	struct microcode_header_amd *mc_hdr = NULL;
-	unsigned int mc_size, leftover, current_size = 0;
+	free_equiv_cpu_table();
+	free_cache();
+}
+
+/*
+ * We return the current size even if some of the checks failed so that
+ * we can skip over the next patch. If we return a negative value, we
+ * signal a grave error like a memory allocation has failed and the
+ * driver cannot continue functioning normally. In such cases, we tear
+ * down everything we've used up so far and exit.
+ */
+static int verify_and_add_patch(unsigned int cpu, u8 *fw, unsigned int leftover)
+{
+	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	struct microcode_header_amd *mc_hdr;
+	struct ucode_patch *patch;
+	unsigned int patch_size, crnt_size, ret;
+	u32 proc_fam;
+	u16 proc_id;
+
+	patch_size  = *(u32 *)(fw + 4);
+	crnt_size   = patch_size + SECTION_HDR_SIZE;
+	mc_hdr	    = (struct microcode_header_amd *)(fw + SECTION_HDR_SIZE);
+	proc_id	    = mc_hdr->processor_rev_id;
+
+	proc_fam = find_cpu_family_by_equiv_cpu(proc_id);
+	if (!proc_fam) {
+		pr_err("No patch family for equiv ID: 0x%04x\n", proc_id);
+		return crnt_size;
+	}
+
+	/* check if patch is for the current family */
+	proc_fam = ((proc_fam >> 8) & 0xf) + ((proc_fam >> 20) & 0xff);
+	if (proc_fam != c->x86)
+		return crnt_size;
+
+	if (mc_hdr->nb_dev_id || mc_hdr->sb_dev_id) {
+		pr_err("Patch-ID 0x%08x: chipset-specific code unsupported.\n",
+			mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	ret = verify_patch_size(cpu, patch_size, leftover);
+	if (!ret) {
+		pr_err("Patch-ID 0x%08x: size mismatch.\n", mc_hdr->patch_id);
+		return crnt_size;
+	}
+
+	patch = kzalloc(sizeof(*patch), GFP_KERNEL);
+	if (!patch) {
+		pr_err("Patch allocation failure.\n");
+		return -EINVAL;
+	}
+
+	patch->data = kzalloc(patch_size, GFP_KERNEL);
+	if (!patch->data) {
+		pr_err("Patch data allocation failure.\n");
+		kfree(patch);
+		return -EINVAL;
+	}
+
+	/* All looks ok, copy patch... */
+	memcpy(patch->data, fw + SECTION_HDR_SIZE, patch_size);
+	INIT_LIST_HEAD(&patch->plist);
+	patch->patch_id  = mc_hdr->patch_id;
+	patch->equiv_cpu = proc_id;
+
+	/* ... and add to cache. */
+	update_cache(patch);
+
+	return crnt_size;
+}
+
+static enum ucode_state load_microcode_amd(int cpu, const u8 *data, size_t size)
+{
+	enum ucode_state ret = UCODE_ERROR;
+	unsigned int leftover;
+	u8 *fw = (u8 *)data;
+	int crnt_size = 0;
 	int offset;
-	const u8 *ucode_ptr = data;
-	void *new_mc = NULL;
-	unsigned int new_rev = uci->cpu_sig.rev;
-	enum ucode_state state = UCODE_ERROR;
 
-	offset = install_equiv_cpu_table(ucode_ptr);
+	offset = install_equiv_cpu_table(data);
 	if (offset < 0) {
 		pr_err("failed to create equivalent cpu table\n");
-		goto out;
+		return ret;
 	}
-	ucode_ptr += offset;
+	fw += offset;
 	leftover = size - offset;
 
-	if (*(u32 *)ucode_ptr != UCODE_UCODE_TYPE) {
+	if (*(u32 *)fw != UCODE_UCODE_TYPE) {
 		pr_err("invalid type field in container file section header\n");
-		goto free_table;
+		free_equiv_cpu_table();
+		return ret;
 	}
 
 	while (leftover) {
-		mc_size = get_matching_microcode(cpu, ucode_ptr, leftover,
-						 new_rev, &current_size);
-		if (mc_size) {
-			mc_hdr  = patch;
-			new_mc  = patch;
-			new_rev = mc_hdr->patch_id;
-			goto out_ok;
-		}
+		crnt_size = verify_and_add_patch(cpu, fw, leftover);
+		if (crnt_size < 0)
+			return ret;
 
-		ucode_ptr += current_size;
-		leftover  -= current_size;
+		fw	 += crnt_size;
+		leftover -= crnt_size;
 	}
 
-	if (!new_mc) {
-		state = UCODE_NFOUND;
-		goto free_table;
-	}
-
-out_ok:
-	uci->mc = new_mc;
-	state = UCODE_OK;
-	pr_debug("CPU%d update ucode (0x%08x -> 0x%08x)\n",
-		 cpu, uci->cpu_sig.rev, new_rev);
-
-free_table:
-	free_equiv_cpu_table();
-
-out:
-	return state;
+	return UCODE_OK;
 }
 
 /*
@@ -401,7 +402,7 @@ out:
  *
  * This legacy file is always smaller than 2K in size.
  *
- * Starting at family 15h they are in family specific firmware files:
+ * Beginning with family 15h, they are in family-specific firmware files:
  *
  *    amd-ucode/microcode_amd_fam15h.bin
  *    amd-ucode/microcode_amd_fam16h.bin
@@ -413,9 +414,13 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 					      bool refresh_fw)
 {
 	char fw_name[36] = "amd-ucode/microcode_amd.bin";
-	const struct firmware *fw;
-	enum ucode_state ret = UCODE_NFOUND;
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
+	enum ucode_state ret = UCODE_NFOUND;
+	const struct firmware *fw;
+
+	/* reload ucode container only on the boot cpu */
+	if (!refresh_fw || c->cpu_index != boot_cpu_data.cpu_index)
+		return UCODE_OK;
 
 	if (c->x86 >= 0x15)
 		snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86);
@@ -431,12 +436,17 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device,
 		goto fw_release;
 	}
 
-	ret = generic_load_microcode(cpu, fw->data, fw->size);
+	/* free old equiv table */
+	free_equiv_cpu_table();
+
+	ret = load_microcode_amd(cpu, fw->data, fw->size);
+	if (ret != UCODE_OK)
+		cleanup();
 
-fw_release:
+ fw_release:
 	release_firmware(fw);
 
-out:
+ out:
 	return ret;
 }
 
@@ -470,14 +480,10 @@ struct microcode_ops * __init init_amd_microcode(void)
 		return NULL;
 	}
 
-	patch = (void *)get_zeroed_page(GFP_KERNEL);
-	if (!patch)
-		return NULL;
-
 	return &microcode_amd_ops;
 }
 
 void __exit exit_amd_microcode(void)
 {
-	free_page((unsigned long)patch);
+	cleanup();
 }
-- 
cgit v1.2.3


From 90993cdd1800dc6ef9587431a0c625b978584e81 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 16 Aug 2012 17:00:19 -0300
Subject: x86: KVM guest: merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST

The distinction between CONFIG_KVM_CLOCK and CONFIG_KVM_GUEST is
not so clear anymore, as demonstrated by recent bugs caused by poor
handling of on/off combinations of these options.

Merge CONFIG_KVM_CLOCK into CONFIG_KVM_GUEST.

Reported-By: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/Kconfig                | 21 ++++++++-------------
 arch/x86/include/asm/kvm_para.h |  4 ++--
 arch/x86/kernel/Makefile        |  3 +--
 arch/x86/kernel/kvm.c           |  2 --
 arch/x86/kernel/setup.c         |  2 +-
 5 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..a42e2e99caae 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -573,23 +573,18 @@ config PARAVIRT_TIME_ACCOUNTING
 
 source "arch/x86/xen/Kconfig"
 
-config KVM_CLOCK
-	bool "KVM paravirtualized clock"
-	select PARAVIRT
-	select PARAVIRT_CLOCK
-	---help---
-	  Turning on this option will allow you to run a paravirtualized clock
-	  when running over the KVM hypervisor. Instead of relying on a PIT
-	  (or probably other) emulation by the underlying device model, the host
-	  provides the guest with timing infrastructure such as time of day, and
-	  system time
-
 config KVM_GUEST
-	bool "KVM Guest support"
+	bool "KVM Guest support (including kvmclock)"
+	select PARAVIRT
 	select PARAVIRT
+	select PARAVIRT_CLOCK
+	default y if PARAVIRT_GUEST
 	---help---
 	  This option enables various optimizations for running under the KVM
-	  hypervisor.
+	  hypervisor. It includes a paravirtualized clock, so that instead
+	  of relying on a PIT (or probably other) emulation by the
+	  underlying device model, the host provides the guest with
+	  timing infrastructure such as time of day, and system time
 
 source "arch/x86/lguest/Kconfig"
 
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h
index 20f5697888bd..eb3e9d85e1f1 100644
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -102,14 +102,14 @@ struct kvm_vcpu_pv_apf_data {
 extern void kvmclock_init(void);
 extern int kvm_register_clock(char *txt);
 
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
 static inline bool kvm_check_and_clear_guest_paused(void)
 {
 	return false;
 }
-#endif /* CONFIG_KVMCLOCK */
+#endif /* CONFIG_KVM_GUEST */
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..7203298e0b83 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -81,8 +81,7 @@ obj-$(CONFIG_DEBUG_RODATA_TEST)	+= test_rodata.o
 obj-$(CONFIG_DEBUG_NX_TEST)	+= test_nx.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
 
-obj-$(CONFIG_KVM_GUEST)		+= kvm.o
-obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o kvmclock.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o
 obj-$(CONFIG_PARAVIRT_CLOCK)	+= pvclock.o
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 1596cc8fd793..b3e5e51bc907 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -397,9 +397,7 @@ void kvm_disable_steal_time(void)
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
-#ifdef CONFIG_KVM_CLOCK
 	WARN_ON(kvm_register_clock("primary cpu clock"));
-#endif
 	kvm_guest_cpu_init();
 	native_smp_prepare_boot_cpu();
 }
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..b3386ae3438b 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -957,7 +957,7 @@ void __init setup_arch(char **cmdline_p)
 	initmem_init();
 	memblock_find_dma_reserve();
 
-#ifdef CONFIG_KVM_CLOCK
+#ifdef CONFIG_KVM_GUEST
 	kvmclock_init();
 #endif
 
-- 
cgit v1.2.3


From 816afe4ff98ee10b1d30fd66361be132a0a5cee6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Mon, 6 Aug 2012 17:29:49 +0930
Subject: x86/smp: Don't ever patch back to UP if we unplug cpus

We still patch SMP instructions to UP variants if we boot with a
single CPU, but not at any other time.  In particular, not if we
unplug CPUs to return to a single cpu.

Paul McKenney points out:

 mean offline overhead is 6251/48=130.2 milliseconds.

 If I remove the alternatives_smp_switch() from the offline
 path [...] the mean offline overhead is 550/42=13.1 milliseconds

Basically, we're never going to get those 120ms back, and the
code is pretty messy.

We get rid of:

 1) The "smp-alt-once" boot option. It's actually "smp-alt-boot", the
    documentation is wrong. It's now the default.

 2) The skip_smp_alternatives flag used by suspend.

 3) arch_disable_nonboot_cpus_begin() and arch_disable_nonboot_cpus_end()
    which were only used to set this one flag.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: Paul McKenney <paul.mckenney@us.ibm.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/87vcgwwive.fsf@rustcorp.com.au
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/kernel-parameters.txt |   3 -
 arch/x86/include/asm/alternative.h  |   4 +-
 arch/x86/kernel/alternative.c       | 107 +++++++++---------------------------
 arch/x86/kernel/smpboot.c           |  20 +------
 arch/x86/xen/smp.c                  |   6 +-
 kernel/cpu.c                        |  11 ----
 6 files changed, 32 insertions(+), 119 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ad7e2e5088c1..7aef3345f739 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2638,9 +2638,6 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	smart2=		[HW]
 			Format: <io1>[,<io2>[,...,<io8>]]
 
-	smp-alt-once	[X86-32,SMP] On a hotplug CPU system, only
-			attempt to substitute SMP alternatives once at boot.
-
 	smsc-ircc2.nopnp	[HW] Don't use PNP to discover SMC devices
 	smsc-ircc2.ircc_cfg=	[HW] Device configuration I/O port
 	smsc-ircc2.ircc_sir=	[HW] SIR base I/O port
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 70780689599a..444704c8e186 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -60,7 +60,7 @@ extern void alternatives_smp_module_add(struct module *mod, char *name,
 					void *locks, void *locks_end,
 					void *text, void *text_end);
 extern void alternatives_smp_module_del(struct module *mod);
-extern void alternatives_smp_switch(int smp);
+extern void alternatives_enable_smp(void);
 extern int alternatives_text_reserved(void *start, void *end);
 extern bool skip_smp_alternatives;
 #else
@@ -68,7 +68,7 @@ static inline void alternatives_smp_module_add(struct module *mod, char *name,
 					       void *locks, void *locks_end,
 					       void *text, void *text_end) {}
 static inline void alternatives_smp_module_del(struct module *mod) {}
-static inline void alternatives_smp_switch(int smp) {}
+static inline void alternatives_enable_smp(void) {}
 static inline int alternatives_text_reserved(void *start, void *end)
 {
 	return 0;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index afb7ff79a29f..af1f326a31c4 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -23,19 +23,6 @@
 
 #define MAX_PATCH_LEN (255-1)
 
-#ifdef CONFIG_HOTPLUG_CPU
-static int smp_alt_once;
-
-static int __init bootonly(char *str)
-{
-	smp_alt_once = 1;
-	return 1;
-}
-__setup("smp-alt-boot", bootonly);
-#else
-#define smp_alt_once 1
-#endif
-
 static int __initdata_or_module debug_alternative;
 
 static int __init debug_alt(char *str)
@@ -326,9 +313,6 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 {
 	const s32 *poff;
 
-	if (noreplace_smp)
-		return;
-
 	mutex_lock(&text_mutex);
 	for (poff = start; poff < end; poff++) {
 		u8 *ptr = (u8 *)poff + *poff;
@@ -359,7 +343,7 @@ struct smp_alt_module {
 };
 static LIST_HEAD(smp_alt_modules);
 static DEFINE_MUTEX(smp_alt);
-static int smp_mode = 1;	/* protected by smp_alt */
+static bool uniproc_patched = false;	/* protected by smp_alt */
 
 void __init_or_module alternatives_smp_module_add(struct module *mod,
 						  char *name,
@@ -368,19 +352,18 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 {
 	struct smp_alt_module *smp;
 
-	if (noreplace_smp)
-		return;
+	mutex_lock(&smp_alt);
+	if (!uniproc_patched)
+		goto unlock;
 
-	if (smp_alt_once) {
-		if (boot_cpu_has(X86_FEATURE_UP))
-			alternatives_smp_unlock(locks, locks_end,
-						text, text_end);
-		return;
-	}
+	if (num_possible_cpus() == 1)
+		/* Don't bother remembering, we'll never have to undo it. */
+		goto smp_unlock;
 
 	smp = kzalloc(sizeof(*smp), GFP_KERNEL);
 	if (NULL == smp)
-		return; /* we'll run the (safe but slow) SMP code then ... */
+		/* we'll run the (safe but slow) SMP code then ... */
+		goto unlock;
 
 	smp->mod	= mod;
 	smp->name	= name;
@@ -392,11 +375,10 @@ void __init_or_module alternatives_smp_module_add(struct module *mod,
 		__func__, smp->locks, smp->locks_end,
 		smp->text, smp->text_end, smp->name);
 
-	mutex_lock(&smp_alt);
 	list_add_tail(&smp->next, &smp_alt_modules);
-	if (boot_cpu_has(X86_FEATURE_UP))
-		alternatives_smp_unlock(smp->locks, smp->locks_end,
-					smp->text, smp->text_end);
+smp_unlock:
+	alternatives_smp_unlock(locks, locks_end, text, text_end);
+unlock:
 	mutex_unlock(&smp_alt);
 }
 
@@ -404,24 +386,18 @@ void __init_or_module alternatives_smp_module_del(struct module *mod)
 {
 	struct smp_alt_module *item;
 
-	if (smp_alt_once || noreplace_smp)
-		return;
-
 	mutex_lock(&smp_alt);
 	list_for_each_entry(item, &smp_alt_modules, next) {
 		if (mod != item->mod)
 			continue;
 		list_del(&item->next);
-		mutex_unlock(&smp_alt);
-		DPRINTK("%s: %s\n", __func__, item->name);
 		kfree(item);
-		return;
+		break;
 	}
 	mutex_unlock(&smp_alt);
 }
 
-bool skip_smp_alternatives;
-void alternatives_smp_switch(int smp)
+void alternatives_enable_smp(void)
 {
 	struct smp_alt_module *mod;
 
@@ -436,34 +412,21 @@ void alternatives_smp_switch(int smp)
 	pr_info("lockdep: fixing up alternatives\n");
 #endif
 
-	if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
-		return;
-	BUG_ON(!smp && (num_online_cpus() > 1));
+	/* Why bother if there are no other CPUs? */
+	BUG_ON(num_possible_cpus() == 1);
 
 	mutex_lock(&smp_alt);
 
-	/*
-	 * Avoid unnecessary switches because it forces JIT based VMs to
-	 * throw away all cached translations, which can be quite costly.
-	 */
-	if (smp == smp_mode) {
-		/* nothing */
-	} else if (smp) {
+	if (uniproc_patched) {
 		pr_info("switching to SMP code\n");
+		BUG_ON(num_online_cpus() != 1);
 		clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
 		clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
 		list_for_each_entry(mod, &smp_alt_modules, next)
 			alternatives_smp_lock(mod->locks, mod->locks_end,
 					      mod->text, mod->text_end);
-	} else {
-		pr_info("switching to UP code\n");
-		set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-		set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-		list_for_each_entry(mod, &smp_alt_modules, next)
-			alternatives_smp_unlock(mod->locks, mod->locks_end,
-						mod->text, mod->text_end);
+		uniproc_patched = false;
 	}
-	smp_mode = smp;
 	mutex_unlock(&smp_alt);
 }
 
@@ -540,40 +503,22 @@ void __init alternative_instructions(void)
 
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
-	/* switch to patch-once-at-boottime-only mode and free the
-	 * tables in case we know the number of CPUs will never ever
-	 * change */
-#ifdef CONFIG_HOTPLUG_CPU
-	if (num_possible_cpus() < 2)
-		smp_alt_once = 1;
-#endif
-
 #ifdef CONFIG_SMP
-	if (smp_alt_once) {
-		if (1 == num_possible_cpus()) {
-			pr_info("switching to UP code\n");
-			set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
-			set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
-
-			alternatives_smp_unlock(__smp_locks, __smp_locks_end,
-						_text, _etext);
-		}
-	} else {
+	/* Patch to UP if other cpus not imminent. */
+	if (!noreplace_smp && (num_present_cpus() == 1 || setup_max_cpus <= 1)) {
+		uniproc_patched = true;
 		alternatives_smp_module_add(NULL, "core kernel",
 					    __smp_locks, __smp_locks_end,
 					    _text, _etext);
-
-		/* Only switch to UP mode if we don't immediately boot others */
-		if (num_present_cpus() == 1 || setup_max_cpus <= 1)
-			alternatives_smp_switch(0);
 	}
-#endif
- 	apply_paravirt(__parainstructions, __parainstructions_end);
 
-	if (smp_alt_once)
+	if (!uniproc_patched || num_possible_cpus() == 1)
 		free_init_pages("SMP alternatives",
 				(unsigned long)__smp_locks,
 				(unsigned long)__smp_locks_end);
+#endif
+
+	apply_paravirt(__parainstructions, __parainstructions_end);
 
 	restart_nmi();
 }
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7c5a8c314c02..c80a33bc528b 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -665,7 +665,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu, struct task_struct *idle)
 	unsigned long boot_error = 0;
 	int timeout;
 
-	alternatives_smp_switch(1);
+	/* Just in case we booted with a single CPU. */
+	alternatives_enable_smp();
 
 	idle->thread.sp = (unsigned long) (((struct pt_regs *)
 			  (THREAD_SIZE +  task_stack_page(idle))) - 1);
@@ -1053,20 +1054,6 @@ out:
 	preempt_enable();
 }
 
-void arch_disable_nonboot_cpus_begin(void)
-{
-	/*
-	 * Avoid the smp alternatives switch during the disable_nonboot_cpus().
-	 * In the suspend path, we will be back in the SMP mode shortly anyways.
-	 */
-	skip_smp_alternatives = true;
-}
-
-void arch_disable_nonboot_cpus_end(void)
-{
-	skip_smp_alternatives = false;
-}
-
 void arch_enable_nonboot_cpus_begin(void)
 {
 	set_mtrr_aps_delayed_init();
@@ -1256,9 +1243,6 @@ void native_cpu_die(unsigned int cpu)
 		if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
 			if (system_state == SYSTEM_RUNNING)
 				pr_info("CPU %u is now offline\n", cpu);
-
-			if (1 == num_online_cpus())
-				alternatives_smp_switch(0);
 			return;
 		}
 		msleep(100);
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index f58dca7a6e52..353c50f18702 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -377,7 +377,8 @@ static int __cpuinit xen_cpu_up(unsigned int cpu, struct task_struct *idle)
 		return rc;
 
 	if (num_online_cpus() == 1)
-		alternatives_smp_switch(1);
+		/* Just in case we booted with a single CPU. */
+		alternatives_enable_smp();
 
 	rc = xen_smp_intr_init(cpu);
 	if (rc)
@@ -424,9 +425,6 @@ static void xen_cpu_die(unsigned int cpu)
 	unbind_from_irqhandler(per_cpu(xen_irq_work, cpu), NULL);
 	xen_uninit_lock_cpu(cpu);
 	xen_teardown_timer(cpu);
-
-	if (num_online_cpus() == 1)
-		alternatives_smp_switch(0);
 }
 
 static void __cpuinit xen_play_dead(void) /* used only with HOTPLUG_CPU */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 14d32588cccd..f6bfe3e03f6b 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -439,14 +439,6 @@ EXPORT_SYMBOL_GPL(cpu_up);
 #ifdef CONFIG_PM_SLEEP_SMP
 static cpumask_var_t frozen_cpus;
 
-void __weak arch_disable_nonboot_cpus_begin(void)
-{
-}
-
-void __weak arch_disable_nonboot_cpus_end(void)
-{
-}
-
 int disable_nonboot_cpus(void)
 {
 	int cpu, first_cpu, error = 0;
@@ -458,7 +450,6 @@ int disable_nonboot_cpus(void)
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
 	cpumask_clear(frozen_cpus);
-	arch_disable_nonboot_cpus_begin();
 
 	printk("Disabling non-boot CPUs ...\n");
 	for_each_online_cpu(cpu) {
@@ -474,8 +465,6 @@ int disable_nonboot_cpus(void)
 		}
 	}
 
-	arch_disable_nonboot_cpus_end();
-
 	if (!error) {
 		BUG_ON(num_online_cpus() > 1);
 		/* Make sure the CPUs won't be enabled by someone else */
-- 
cgit v1.2.3


From bd3f79b71de0410352ab506496a467fcb0620912 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 22 Aug 2012 17:20:14 +0100
Subject: xen: Introduce xen_pfn_t for pfn and mfn types

All the original Xen headers have xen_pfn_t as mfn and pfn type, however
when they have been imported in Linux, xen_pfn_t has been replaced with
unsigned long. That might work for x86 and ia64 but it does not for arm.
Bring back xen_pfn_t and let each architecture define xen_pfn_t as they
see fit.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/ia64/include/asm/xen/interface.h | 5 ++++-
 arch/x86/include/asm/xen/interface.h  | 5 +++++
 include/xen/interface/grant_table.h   | 4 ++--
 include/xen/interface/memory.h        | 6 +++---
 include/xen/interface/platform.h      | 4 ++--
 include/xen/interface/xen.h           | 6 +++---
 include/xen/privcmd.h                 | 2 --
 7 files changed, 19 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/xen/interface.h b/arch/ia64/include/asm/xen/interface.h
index ee9cad6e749b..3d52a5bbd857 100644
--- a/arch/ia64/include/asm/xen/interface.h
+++ b/arch/ia64/include/asm/xen/interface.h
@@ -67,6 +67,10 @@
 #define set_xen_guest_handle(hnd, val)	do { (hnd).p = val; } while (0)
 
 #ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the public interface
+ * with Xen so that we could have one ABI that works for 32 and 64 bit
+ * guests. */
+typedef unsigned long xen_pfn_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint, unsigned int);
@@ -79,7 +83,6 @@ DEFINE_GUEST_HANDLE(void);
 DEFINE_GUEST_HANDLE(uint64_t);
 DEFINE_GUEST_HANDLE(uint32_t);
 
-typedef unsigned long xen_pfn_t;
 DEFINE_GUEST_HANDLE(xen_pfn_t);
 #define PRI_xen_pfn	"lx"
 #endif
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index a93db16e9582..555f94d3637b 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -47,6 +47,10 @@
 #endif
 
 #ifndef __ASSEMBLY__
+/* Explicitly size integers that represent pfns in the public interface
+ * with Xen so that on ARM we can have one ABI that works for 32 and 64
+ * bit guests. */
+typedef unsigned long xen_pfn_t;
 /* Guest handles for primitive C types. */
 __DEFINE_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_GUEST_HANDLE(uint,  unsigned int);
@@ -57,6 +61,7 @@ DEFINE_GUEST_HANDLE(long);
 DEFINE_GUEST_HANDLE(void);
 DEFINE_GUEST_HANDLE(uint64_t);
 DEFINE_GUEST_HANDLE(uint32_t);
+DEFINE_GUEST_HANDLE(xen_pfn_t);
 #endif
 
 #ifndef HYPERVISOR_VIRT_START
diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h
index a17d84433e6a..7da811bdd558 100644
--- a/include/xen/interface/grant_table.h
+++ b/include/xen/interface/grant_table.h
@@ -338,7 +338,7 @@ DEFINE_GUEST_HANDLE_STRUCT(gnttab_dump_table);
 #define GNTTABOP_transfer                4
 struct gnttab_transfer {
     /* IN parameters. */
-    unsigned long mfn;
+    xen_pfn_t mfn;
     domid_t       domid;
     grant_ref_t   ref;
     /* OUT parameters. */
@@ -375,7 +375,7 @@ struct gnttab_copy {
 	struct {
 		union {
 			grant_ref_t ref;
-			unsigned long   gmfn;
+			xen_pfn_t   gmfn;
 		} u;
 		domid_t  domid;
 		uint16_t offset;
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 8d4efc1cc64a..d8e33a93ea4d 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -31,7 +31,7 @@ struct xen_memory_reservation {
      *   OUT: GMFN bases of extents that were allocated
      *   (NB. This command also updates the mach_to_phys translation table)
      */
-    GUEST_HANDLE(ulong) extent_start;
+    GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /* Number of extents, and size/alignment of each (2^extent_order pages). */
     unsigned long  nr_extents;
@@ -130,7 +130,7 @@ struct xen_machphys_mfn_list {
      * any large discontiguities in the machine address space, 2MB gaps in
      * the machphys table will be represented by an MFN base of zero.
      */
-    GUEST_HANDLE(ulong) extent_start;
+    GUEST_HANDLE(xen_pfn_t) extent_start;
 
     /*
      * Number of extents written to the above array. This will be smaller
@@ -175,7 +175,7 @@ struct xen_add_to_physmap {
     unsigned long idx;
 
     /* GPFN where the source mapping page should appear. */
-    unsigned long gpfn;
+    xen_pfn_t gpfn;
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_add_to_physmap);
 
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 486653f0dd8f..0bea47027fa2 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -54,7 +54,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_settime_t);
 #define XENPF_add_memtype         31
 struct xenpf_add_memtype {
 	/* IN variables. */
-	unsigned long mfn;
+	xen_pfn_t mfn;
 	uint64_t nr_mfns;
 	uint32_t type;
 	/* OUT variables. */
@@ -84,7 +84,7 @@ struct xenpf_read_memtype {
 	/* IN variables. */
 	uint32_t reg;
 	/* OUT variables. */
-	unsigned long mfn;
+	xen_pfn_t mfn;
 	uint64_t nr_mfns;
 	uint32_t type;
 };
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 3871e4753680..42834a36d345 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -188,7 +188,7 @@ struct mmuext_op {
 	unsigned int cmd;
 	union {
 		/* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
-		unsigned long mfn;
+		xen_pfn_t mfn;
 		/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
 		unsigned long linear_addr;
 	} arg1;
@@ -428,11 +428,11 @@ struct start_info {
 	unsigned long nr_pages;     /* Total pages allocated to this domain.  */
 	unsigned long shared_info;  /* MACHINE address of shared info struct. */
 	uint32_t flags;             /* SIF_xxx flags.                         */
-	unsigned long store_mfn;    /* MACHINE page number of shared page.    */
+	xen_pfn_t store_mfn;        /* MACHINE page number of shared page.    */
 	uint32_t store_evtchn;      /* Event channel for store communication. */
 	union {
 		struct {
-			unsigned long mfn;  /* MACHINE page number of console page.   */
+			xen_pfn_t mfn;      /* MACHINE page number of console page.   */
 			uint32_t  evtchn;   /* Event channel for console page.        */
 		} domU;
 		struct {
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
index 4d588814510b..45c1aa14b83d 100644
--- a/include/xen/privcmd.h
+++ b/include/xen/privcmd.h
@@ -37,8 +37,6 @@
 #include <linux/compiler.h>
 #include <xen/interface/xen.h>
 
-typedef unsigned long xen_pfn_t;
-
 struct privcmd_hypercall {
 	__u64 op;
 	__u64 arg[5];
-- 
cgit v1.2.3


From 1a1d43318aeb74d679372c0b65029957be274529 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 22 Aug 2012 17:20:16 +0100
Subject: xen: allow privcmd for HVM guests

This patch removes the "return -ENOSYS" for auto_translated_physmap
guests from privcmd_mmap, thus it allows ARM guests to issue privcmd
mmap calls. However privcmd mmap calls are still going to fail for HVM
and hybrid guests on x86 because the xen_remap_domain_mfn_range
implementation is currently PV only.

Changes in v2:

- better commit message;
- return -EINVAL from xen_remap_domain_mfn_range if
  auto_translated_physmap.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Acked-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c    | 3 +++
 drivers/xen/privcmd.c | 4 ----
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785631ce..885a22354a96 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2310,6 +2310,9 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 	unsigned long range;
 	int err = 0;
 
+	if (xen_feature(XENFEAT_auto_translated_physmap))
+		return -EINVAL;
+
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ccee0f16bcf8..85226cbeca24 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -380,10 +380,6 @@ static struct vm_operations_struct privcmd_vm_ops = {
 
 static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	/* Unsupported for auto-translate guests. */
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return -ENOSYS;
-
 	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
 	 * how to recreate these mappings */
 	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
-- 
cgit v1.2.3


From 6d7083eee3bc088d1fc30eefabd6263bca40c95a Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 13 Aug 2012 11:00:08 -0400
Subject: xen/swiotlb: Fix compile warnings when using plain integer instead of
 NULL pointer.

arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer
arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/pci-swiotlb-xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 2d58b3ff4fae..1ab45941502d 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -63,6 +63,6 @@ void __init pci_xen_swiotlb_init(void)
 	}
 }
 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
-		  0,
+		  NULL,
 		  pci_xen_swiotlb_init,
-		  0);
+		  NULL);
-- 
cgit v1.2.3


From d57c5d51a30152f3175d2344cb6395f08bf8ee0c Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 9 Feb 2011 13:32:18 -0500
Subject: ftrace/x86: Add support for -mfentry to x86_64

If the kernel is compiled with gcc 4.6.0 which supports -mfentry,
then use that instead of mcount.

With mcount, frame pointers are forced with the -pg option and we
get something like:

<can_vma_merge_before>:
       55                      push   %rbp
       48 89 e5                mov    %rsp,%rbp
       53                      push   %rbx
       41 51                   push   %r9
       e8 fe 6a 39 00          callq  ffffffff81483d00 <mcount>
       31 c0                   xor    %eax,%eax
       48 89 fb                mov    %rdi,%rbx
       48 89 d7                mov    %rdx,%rdi
       48 33 73 30             xor    0x30(%rbx),%rsi
       48 f7 c6 ff ff ff f7    test   $0xfffffffff7ffffff,%rsi

With -mfentry, frame pointers are no longer forced and the call looks
like this:

<can_vma_merge_before>:
       e8 33 af 37 00          callq  ffffffff81461b40 <__fentry__>
       53                      push   %rbx
       48 89 fb                mov    %rdi,%rbx
       31 c0                   xor    %eax,%eax
       48 89 d7                mov    %rdx,%rdi
       41 51                   push   %r9
       48 33 73 30             xor    0x30(%rbx),%rsi
       48 f7 c6 ff ff ff f7    test   $0xfffffffff7ffffff,%rsi

This adds the ftrace hook at the beginning of the function before a
frame is set up, and allows the function callbacks to be able to access
parameters. As kprobes now can use function tracing (at least on x86)
this speeds up the kprobe hooks that are at the beginning of the
function.

Link: http://lkml.kernel.org/r/20120807194100.130477900@goodmis.org

Acked-by: Ingo Molnar <mingo@kernel.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/ftrace.h    |  7 ++++++-
 arch/x86/kernel/entry_64.S       | 32 +++++++++++++++++++++++++++-----
 arch/x86/kernel/x8664_ksyms_64.c |  6 +++++-
 4 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index a2d19ee750ca..28dd891a0a16 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -36,6 +36,7 @@ config X86
 	select HAVE_KRETPROBES
 	select HAVE_OPTPROBES
 	select HAVE_FTRACE_MCOUNT_RECORD
+	select HAVE_FENTRY if X86_64
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index a6cae0c1720c..9a25b522d377 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -35,7 +35,11 @@
 #endif
 
 #ifdef CONFIG_FUNCTION_TRACER
-#define MCOUNT_ADDR		((long)(mcount))
+#ifdef CC_USING_FENTRY
+# define MCOUNT_ADDR		((long)(__fentry__))
+#else
+# define MCOUNT_ADDR		((long)(mcount))
+#endif
 #define MCOUNT_INSN_SIZE	5 /* sizeof mcount call */
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -46,6 +50,7 @@
 #ifndef __ASSEMBLY__
 extern void mcount(void);
 extern atomic_t modifying_ftrace_code;
+extern void __fentry__(void);
 
 static inline unsigned long ftrace_call_adjust(unsigned long addr)
 {
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b7a81dcb7366..ed767b747fe5 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -68,10 +68,18 @@
 	.section .entry.text, "ax"
 
 #ifdef CONFIG_FUNCTION_TRACER
+
+#ifdef CC_USING_FENTRY
+# define function_hook	__fentry__
+#else
+# define function_hook	mcount
+#endif
+
 #ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(mcount)
+
+ENTRY(function_hook)
 	retq
-END(mcount)
+END(function_hook)
 
 /* skip is set if stack has been adjusted */
 .macro ftrace_caller_setup skip=0
@@ -84,7 +92,11 @@ END(mcount)
 	movq RIP(%rsp), %rdi
 	subq $MCOUNT_INSN_SIZE, %rdi
 	/* Load the parent_ip into the second parameter */
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
 	movq 8(%rbp), %rsi
+#endif
 .endm
 
 ENTRY(ftrace_caller)
@@ -177,7 +189,8 @@ END(ftrace_regs_caller)
 
 
 #else /* ! CONFIG_DYNAMIC_FTRACE */
-ENTRY(mcount)
+
+ENTRY(function_hook)
 	cmpl $0, function_trace_stop
 	jne  ftrace_stub
 
@@ -199,7 +212,11 @@ trace:
 	MCOUNT_SAVE_FRAME
 
 	movq RIP(%rsp), %rdi
+#ifdef CC_USING_FENTRY
+	movq SS+16(%rsp), %rsi
+#else
 	movq 8(%rbp), %rsi
+#endif
 	subq $MCOUNT_INSN_SIZE, %rdi
 
 	call   *ftrace_trace_function
@@ -207,7 +224,7 @@ trace:
 	MCOUNT_RESTORE_FRAME
 
 	jmp ftrace_stub
-END(mcount)
+END(function_hook)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 #endif /* CONFIG_FUNCTION_TRACER */
 
@@ -215,9 +232,14 @@ END(mcount)
 ENTRY(ftrace_graph_caller)
 	MCOUNT_SAVE_FRAME
 
+#ifdef CC_USING_FENTRY
+	leaq SS+16(%rsp), %rdi
+	movq $0, %rdx	/* No framepointers needed */
+#else
 	leaq 8(%rbp), %rdi
-	movq RIP(%rsp), %rsi
 	movq (%rbp), %rdx
+#endif
+	movq RIP(%rsp), %rsi
 	subq $MCOUNT_INSN_SIZE, %rsi
 
 	call	prepare_ftrace_return
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 6020f6f5927c..1330dd102950 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -13,9 +13,13 @@
 #include <asm/ftrace.h>
 
 #ifdef CONFIG_FUNCTION_TRACER
-/* mcount is defined in assembly */
+/* mcount and __fentry__ are defined in assembly */
+#ifdef CC_USING_FENTRY
+EXPORT_SYMBOL(__fentry__);
+#else
 EXPORT_SYMBOL(mcount);
 #endif
+#endif
 
 EXPORT_SYMBOL(__get_user_1);
 EXPORT_SYMBOL(__get_user_2);
-- 
cgit v1.2.3


From 51faaf2b0d5c7f44d82964f0c70b1c4e44d4e633 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 22 Aug 2012 13:00:10 -0400
Subject: Revert "xen/x86: Workaround 64-bit hypervisor and 32-bit initial
 domain." and "xen/x86: Use memblock_reserve for sensitive areas."

This reverts commit 806c312e50f122c47913145cf884f53dd09d9199 and
commit 59b294403e9814e7c1154043567f0d71bac7a511.

And also documents setup.c and why we want to do it that way, which
is that we tried to make the the memblock_reserve more selective so
that it would be clear what region is reserved. Sadly we ran
in the problem wherein on a 64-bit hypervisor with a 32-bit
initial domain, the pt_base has the cr3 value which is not
neccessarily where the pagetable starts! As Jan put it: "
Actually, the adjustment turns out to be correct: The page
tables for a 32-on-64 dom0 get allocated in the order "first L1",
"first L2", "first L3", so the offset to the page table base is
indeed 2. When reading xen/include/public/xen.h's comment
very strictly, this is not a violation (since there nothing is said
that the first thing in the page table space is pointed to by
pt_base; I admit that this seems to be implied though, namely
do I think that it is implied that the page table space is the
range [pt_base, pt_base + nt_pt_frames), whereas that
range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
which - without a priori knowledge - the kernel would have
difficulty to figure out)." - so lets just fall back to the
easy way and reserve the whole region.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 60 ------------------------------------------------
 arch/x86/xen/p2m.c       |  5 ----
 arch/x86/xen/setup.c     | 27 ++++++++++++++++++++++
 3 files changed, 27 insertions(+), 65 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 511f92d79e4a..ff962d4b821e 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -998,66 +998,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 
 	return ret;
 }
-/*
- * If the MFN is not in the m2p (provided to us by the hypervisor) this
- * function won't do anything. In practice this means that the XenBus
- * MFN won't be available for the initial domain. */
-static unsigned long __init xen_reserve_mfn(unsigned long mfn)
-{
-	unsigned long pfn, end_pfn = 0;
-
-	if (!mfn)
-		return end_pfn;
-
-	pfn = mfn_to_pfn(mfn);
-	if (phys_to_machine_mapping_valid(pfn)) {
-		end_pfn = PFN_PHYS(pfn) + PAGE_SIZE;
-		memblock_reserve(PFN_PHYS(pfn), end_pfn);
-	}
-	return end_pfn;
-}
-static void __init xen_reserve_internals(void)
-{
-	unsigned long size;
-	unsigned long last_phys = 0;
-
-	if (!xen_pv_domain())
-		return;
-
-	/* xen_start_info does not exist in the M2P, hence can't use
-	 * xen_reserve_mfn. */
-	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
-	last_phys = __pa(xen_start_info) + PAGE_SIZE;
-
-	last_phys = max(xen_reserve_mfn(PFN_DOWN(xen_start_info->shared_info)), last_phys);
-	last_phys = max(xen_reserve_mfn(xen_start_info->store_mfn), last_phys);
 
-	if (!xen_initial_domain())
-		last_phys = max(xen_reserve_mfn(xen_start_info->console.domU.mfn), last_phys);
-
-	if (xen_feature(XENFEAT_auto_translated_physmap))
-		return;
-
-	/*
-	 * ALIGN up to compensate for the p2m_page pointing to an array that
-	 * can partially filled (look in xen_build_dynamic_phys_to_machine).
-	 */
-
-	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
-
-	/* We could use xen_reserve_mfn here, but would end up looping quite
-	 * a lot (and call memblock_reserve for each PAGE), so lets just use
-	 * the easy way and reserve it wholesale. */
-	memblock_reserve(__pa(xen_start_info->mfn_list), size);
-	last_phys = max(__pa(xen_start_info->mfn_list) + size, last_phys);
-	/* The pagetables are reserved in mmu.c */
-
-	/* Under 64-bit hypervisor with a 32-bit domain, the hypervisor
-	 * offsets the pt_base by two pages. Hence the reservation that is done
-	 * in mmu.c misses two pages. We correct it here if we detect this. */
-	if (last_phys < __pa(xen_start_info->pt_base))
-		memblock_reserve(last_phys, __pa(xen_start_info->pt_base) - last_phys);
-}
 void xen_setup_shared_info(void)
 {
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
@@ -1421,7 +1362,6 @@ asmlinkage void __init xen_start_kernel(void)
 	xen_raw_console_write("mapping kernel into physical memory\n");
 	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
 
-	xen_reserve_internals();
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 6a2bfa43c8a1..e4adbfbdfada 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -388,11 +388,6 @@ void __init xen_build_dynamic_phys_to_machine(void)
 	}
 
 	m2p_override_init();
-
-	/* NOTE: We cannot call memblock_reserve here for the mfn_list as there
-	 * isn't enough pieces to make it work (for one - we are still using the
-	 * Xen provided pagetable). Do it later in xen_reserve_internals.
-	 */
 }
 
 unsigned long get_phys_to_machine(unsigned long pfn)
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index 9efca750405d..740517be4da5 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -424,6 +424,33 @@ char * __init xen_memory_setup(void)
 	e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
 			E820_RESERVED);
 
+	/*
+	 * Reserve Xen bits:
+	 *  - mfn_list
+	 *  - xen_start_info
+	 * See comment above "struct start_info" in <xen/interface/xen.h>
+	 * We tried to make the the memblock_reserve more selective so
+	 * that it would be clear what region is reserved. Sadly we ran
+	 * in the problem wherein on a 64-bit hypervisor with a 32-bit
+	 * initial domain, the pt_base has the cr3 value which is not
+	 * neccessarily where the pagetable starts! As Jan put it: "
+	 * Actually, the adjustment turns out to be correct: The page
+	 * tables for a 32-on-64 dom0 get allocated in the order "first L1",
+	 * "first L2", "first L3", so the offset to the page table base is
+	 * indeed 2. When reading xen/include/public/xen.h's comment
+	 * very strictly, this is not a violation (since there nothing is said
+	 * that the first thing in the page table space is pointed to by
+	 * pt_base; I admit that this seems to be implied though, namely
+	 * do I think that it is implied that the page table space is the
+	 * range [pt_base, pt_base + nt_pt_frames), whereas that
+	 * range here indeed is [pt_base - 2, pt_base - 2 + nt_pt_frames),
+	 * which - without a priori knowledge - the kernel would have
+	 * difficulty to figure out)." - so lets just fall back to the
+	 * easy way and reserve the whole region.
+	 */
+	memblock_reserve(__pa(xen_start_info->mfn_list),
+			 xen_start_info->pt_base - xen_start_info->mfn_list);
+
 	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
 
 	return "Xen";
-- 
cgit v1.2.3


From 3699aad047e16a5775b1d051425f422a9384270d Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 28 Jun 2012 22:47:35 -0400
Subject: xen/mmu: The xen_setup_kernel_pagetable doesn't need to return
 anything.

We don't need to return the new PGD - as we do not use it.

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c |  5 +----
 arch/x86/xen/mmu.c       | 10 ++--------
 arch/x86/xen/xen-ops.h   |  2 +-
 3 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index ff962d4b821e..d87a038a0484 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1258,7 +1258,6 @@ asmlinkage void __init xen_start_kernel(void)
 {
 	struct physdev_set_iopl set_iopl;
 	int rc;
-	pgd_t *pgd;
 
 	if (!xen_start_info)
 		return;
@@ -1350,8 +1349,6 @@ asmlinkage void __init xen_start_kernel(void)
 	acpi_numa = -1;
 #endif
 
-	pgd = (pgd_t *)xen_start_info->pt_base;
-
 	/* Don't do the full vcpu_info placement stuff until we have a
 	   possible map and a non-dummy shared_info. */
 	per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0];
@@ -1360,7 +1357,7 @@ asmlinkage void __init xen_start_kernel(void)
 	early_boot_irqs_disabled = true;
 
 	xen_raw_console_write("mapping kernel into physical memory\n");
-	pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages);
+	xen_setup_kernel_pagetable((pgd_t *)xen_start_info->pt_base, xen_start_info->nr_pages);
 
 	/* Allocate and initialize top and mid mfn levels for p2m structure */
 	xen_build_mfn_list_list();
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3a73785631ce..4ac21a4c6da4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1719,8 +1719,7 @@ static void convert_pfn_mfn(void *v)
  * of the physical mapping once some sort of allocator has been set
  * up.
  */
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
-					 unsigned long max_pfn)
+void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
@@ -1781,8 +1780,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 
 	memblock_reserve(__pa(xen_start_info->pt_base),
 			 xen_start_info->nr_pt_frames * PAGE_SIZE);
-
-	return pgd;
 }
 #else	/* !CONFIG_X86_64 */
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
@@ -1825,8 +1822,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	pv_mmu_ops.write_cr3 = &xen_write_cr3;
 }
 
-pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
-					 unsigned long max_pfn)
+void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pmd_t *kernel_pmd;
 
@@ -1858,8 +1854,6 @@ pgd_t * __init xen_setup_kernel_pagetable(pgd_t *pgd,
 
 	memblock_reserve(__pa(xen_start_info->pt_base),
 			 xen_start_info->nr_pt_frames * PAGE_SIZE);
-
-	return initial_page_table;
 }
 #endif	/* CONFIG_X86_64 */
 
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 202d4c150154..2230f57a6ebe 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -27,7 +27,7 @@ void xen_setup_mfn_list_list(void);
 void xen_setup_shared_info(void);
 void xen_build_mfn_list_list(void);
 void xen_setup_machphys_mapping(void);
-pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
+void xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn);
 void xen_reserve_top(void);
 extern unsigned long xen_max_p2m_pfn;
 
-- 
cgit v1.2.3


From 4fac153a7a260e40e10008a0d7a272719684e4cd Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 12 Jul 2012 13:55:25 -0400
Subject: xen/mmu: Provide comments describing the _ka and _va aliasing issue

Which is that the level2_kernel_pgt (__ka virtual addresses)
and level2_ident_pgt (__va virtual address) contain the same
PMD entries. So if you modify a PTE in __ka, it will be reflected
in __va (and vice-versa).

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 4ac21a4c6da4..6ba610098dd9 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1734,19 +1734,36 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	init_level4_pgt[0] = __pgd(0);
 
 	/* Pre-constructed entries are in pfn, so convert to mfn */
+	/* L4[272] -> level3_ident_pgt
+	 * L4[511] -> level3_kernel_pgt */
 	convert_pfn_mfn(init_level4_pgt);
+
+	/* L3_i[0] -> level2_ident_pgt */
 	convert_pfn_mfn(level3_ident_pgt);
+	/* L3_k[510] -> level2_kernel_pgt
+	 * L3_i[511] -> level2_fixmap_pgt */
 	convert_pfn_mfn(level3_kernel_pgt);
 
+	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
 
+	/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
+	 * Both L4[272][0] and L4[511][511] have entries that point to the same
+	 * L2 (PMD) tables. Meaning that if you modify it in __va space
+	 * it will be also modified in the __ka space! (But if you just
+	 * modify the PMD table to point to other PTE's or none, then you
+	 * are OK - which is what cleanup_highmap does) */
 	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	/* Graft it onto L4[511][511] */
 	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
 
+	/* Get [511][510] and graft that in level2_fixmap_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
 	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	/* Note that we don't do anything with level1_fixmap_pgt which
+	 * we don't need. */
 
 	/* Set up identity map */
 	xen_map_identity_early(level2_ident_pgt, max_pfn);
-- 
cgit v1.2.3


From ae895ed7839f918bbc8d5425b8973b25a534f4eb Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 26 Jul 2012 11:57:04 -0400
Subject: xen/mmu: use copy_page instead of memcpy.

After all, this is what it is there for.

Acked-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 6ba610098dd9..7247e5a62f27 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1754,14 +1754,14 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	 * it will be also modified in the __ka space! (But if you just
 	 * modify the PMD table to point to other PTE's or none, then you
 	 * are OK - which is what cleanup_highmap does) */
-	memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(level2_ident_pgt, l2);
 	/* Graft it onto L4[511][511] */
-	memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(level2_kernel_pgt, l2);
 
 	/* Get [511][510] and graft that in level2_fixmap_pgt */
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud);
-	memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(level2_fixmap_pgt, l2);
 	/* Note that we don't do anything with level1_fixmap_pgt which
 	 * we don't need. */
 
@@ -1821,8 +1821,7 @@ static void __init xen_write_cr3_init(unsigned long cr3)
 	 */
 	swapper_kernel_pmd =
 		extend_brk(sizeof(pmd_t) * PTRS_PER_PMD, PAGE_SIZE);
-	memcpy(swapper_kernel_pmd, initial_kernel_pmd,
-	       sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(swapper_kernel_pmd, initial_kernel_pmd);
 	swapper_pg_dir[KERNEL_PGD_BOUNDARY] =
 		__pgd(__pa(swapper_kernel_pmd) | _PAGE_PRESENT);
 	set_page_prot(swapper_kernel_pmd, PAGE_KERNEL_RO);
@@ -1851,11 +1850,11 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 				  512*1024);
 
 	kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd);
-	memcpy(initial_kernel_pmd, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD);
+	copy_page(initial_kernel_pmd, kernel_pmd);
 
 	xen_map_identity_early(initial_kernel_pmd, max_pfn);
 
-	memcpy(initial_page_table, pgd, sizeof(pgd_t) * PTRS_PER_PGD);
+	copy_page(initial_page_table, pgd);
 	initial_page_table[KERNEL_PGD_BOUNDARY] =
 		__pgd(__pa(initial_kernel_pmd) | _PAGE_PRESENT);
 
-- 
cgit v1.2.3


From caaf9ecf16feffa4f1a5a0d617bc78906a114514 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 12 Jul 2012 13:59:36 -0400
Subject: xen/mmu: For 64-bit do not call xen_map_identity_early

B/c we do not need it. During the startup the Xen provides
us with all the initial memory mapped that we need to function.

The initial memory mapped is up to the bootstack, which means
we can reference using __ka up to 4.f):

(from xen/interface/xen.h):

 4. This the order of bootstrap elements in the initial virtual region:
   a. relocated kernel image
   b. initial ram disk              [mod_start, mod_len]
   c. list of allocated page frames [mfn_list, nr_pages]
   d. start_info_t structure        [register ESI (x86)]
   e. bootstrap page tables         [pt_base, CR3 (x86)]
   f. bootstrap stack               [register ESP (x86)]

(initial ram disk may be ommitted).

[v1: More comments in git commit]
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 7247e5a62f27..a59070b09055 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -84,6 +84,7 @@
  */
 DEFINE_SPINLOCK(xen_reservation_lock);
 
+#ifdef CONFIG_X86_32
 /*
  * Identity map, in addition to plain kernel map.  This needs to be
  * large enough to allocate page table pages to allocate the rest.
@@ -91,7 +92,7 @@ DEFINE_SPINLOCK(xen_reservation_lock);
  */
 #define LEVEL1_IDENT_ENTRIES	(PTRS_PER_PTE * 4)
 static RESERVE_BRK_ARRAY(pte_t, level1_ident_pgt, LEVEL1_IDENT_ENTRIES);
-
+#endif
 #ifdef CONFIG_X86_64
 /* l3 pud for userspace vsyscall mapping */
 static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
@@ -1628,7 +1629,7 @@ static void set_page_prot(void *addr, pgprot_t prot)
 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0))
 		BUG();
 }
-
+#ifdef CONFIG_X86_32
 static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 {
 	unsigned pmdidx, pteidx;
@@ -1679,7 +1680,7 @@ static void __init xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn)
 
 	set_page_prot(pmd, PAGE_KERNEL_RO);
 }
-
+#endif
 void __init xen_setup_machphys_mapping(void)
 {
 	struct xen_machphys_mapping mapping;
@@ -1765,14 +1766,12 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Note that we don't do anything with level1_fixmap_pgt which
 	 * we don't need. */
 
-	/* Set up identity map */
-	xen_map_identity_early(level2_ident_pgt, max_pfn);
-
 	/* Make pagetable pieces RO */
 	set_page_prot(init_level4_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
+	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
 
-- 
cgit v1.2.3


From 488f046df922af992c1a718eff276529c0510885 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 26 Jul 2012 12:00:56 -0400
Subject: xen/mmu: Recycle the Xen provided L4, L3, and L2 pages

As we are not using them. We end up only using the L1 pagetables
and grafting those to our page-tables.

[v1: Per Stefano's suggestion squashed two commits]
[v2: Per Stefano's suggestion simplified loop]
[v3: Fix smatch warnings]
[v4: Add more comments]
Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 42 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a59070b09055..b44e6a88ea74 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1708,7 +1708,20 @@ static void convert_pfn_mfn(void *v)
 	for (i = 0; i < PTRS_PER_PTE; i++)
 		pte[i] = xen_make_pte(pte[i].pte);
 }
-
+static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
+				 unsigned long addr)
+{
+	if (*pt_base == PFN_DOWN(__pa(addr))) {
+		set_page_prot((void *)addr, PAGE_KERNEL);
+		clear_page((void *)addr);
+		(*pt_base)++;
+	}
+	if (*pt_end == PFN_DOWN(__pa(addr))) {
+		set_page_prot((void *)addr, PAGE_KERNEL);
+		clear_page((void *)addr);
+		(*pt_end)--;
+	}
+}
 /*
  * Set up the initial kernel pagetable.
  *
@@ -1724,6 +1737,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 {
 	pud_t *l3;
 	pmd_t *l2;
+	unsigned long addr[3];
+	unsigned long pt_base, pt_end;
+	unsigned i;
 
 	/* max_pfn_mapped is the last pfn mapped in the initial memory
 	 * mappings. Considering that on Xen after the kernel mappings we
@@ -1731,6 +1747,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	 * set max_pfn_mapped to the last real pfn mapped. */
 	max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
 
+	pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
+	pt_end = pt_base + xen_start_info->nr_pt_frames;
+
 	/* Zap identity mapping */
 	init_level4_pgt[0] = __pgd(0);
 
@@ -1749,6 +1768,9 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
 
+	addr[0] = (unsigned long)pgd;
+	addr[1] = (unsigned long)l3;
+	addr[2] = (unsigned long)l2;
 	/* Graft it onto L4[272][0]. Note that we creating an aliasing problem:
 	 * Both L4[272][0] and L4[511][511] have entries that point to the same
 	 * L2 (PMD) tables. Meaning that if you modify it in __va space
@@ -1782,20 +1804,26 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 	/* Unpin Xen-provided one */
 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
 
-	/* Switch over */
-	pgd = init_level4_pgt;
-
 	/*
 	 * At this stage there can be no user pgd, and no page
 	 * structure to attach it to, so make sure we just set kernel
 	 * pgd.
 	 */
 	xen_mc_batch();
-	__xen_write_cr3(true, __pa(pgd));
+	__xen_write_cr3(true, __pa(init_level4_pgt));
 	xen_mc_issue(PARAVIRT_LAZY_CPU);
 
-	memblock_reserve(__pa(xen_start_info->pt_base),
-			 xen_start_info->nr_pt_frames * PAGE_SIZE);
+	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
+	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
+	 * the initial domain. For guests using the toolstack, they are in:
+	 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
+	 * rip out the [L4] (pgd), but for guests we shave off three pages.
+	 */
+	for (i = 0; i < ARRAY_SIZE(addr); i++)
+		check_pt_base(&pt_base, &pt_end, addr[i]);
+
+	/* Our (by three pages) smaller Xen pagetable that we are using */
+	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
 }
 #else	/* !CONFIG_X86_64 */
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
-- 
cgit v1.2.3


From 357a3cfb147ee8e97c6f9cdc51e9a33aa56f7d99 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 19 Jul 2012 13:52:29 -0400
Subject: xen/p2m: Add logic to revector a P2M tree to use __va leafs.

During bootup Xen supplies us with a P2M array. It sticks
it right after the ramdisk, as can be seen with a 128GB PV guest:

(certain parts removed for clarity):
xc_dom_build_image: called
xc_dom_alloc_segment:   kernel       : 0xffffffff81000000 -> 0xffffffff81e43000  (pfn 0x1000 + 0xe43 pages)
xc_dom_pfn_to_ptr: domU mapping: pfn 0x1000+0xe43 at 0x7f097d8bf000
xc_dom_alloc_segment:   ramdisk      : 0xffffffff81e43000 -> 0xffffffff925c7000  (pfn 0x1e43 + 0x10784 pages)
xc_dom_pfn_to_ptr: domU mapping: pfn 0x1e43+0x10784 at 0x7f0952dd2000
xc_dom_alloc_segment:   phys2mach    : 0xffffffff925c7000 -> 0xffffffffa25c7000  (pfn 0x125c7 + 0x10000 pages)
xc_dom_pfn_to_ptr: domU mapping: pfn 0x125c7+0x10000 at 0x7f0942dd2000
xc_dom_alloc_page   :   start info   : 0xffffffffa25c7000 (pfn 0x225c7)
xc_dom_alloc_page   :   xenstore     : 0xffffffffa25c8000 (pfn 0x225c8)
xc_dom_alloc_page   :   console      : 0xffffffffa25c9000 (pfn 0x225c9)
nr_page_tables: 0x0000ffffffffffff/48: 0xffff000000000000 -> 0xffffffffffffffff, 1 table(s)
nr_page_tables: 0x0000007fffffffff/39: 0xffffff8000000000 -> 0xffffffffffffffff, 1 table(s)
nr_page_tables: 0x000000003fffffff/30: 0xffffffff80000000 -> 0xffffffffbfffffff, 1 table(s)
nr_page_tables: 0x00000000001fffff/21: 0xffffffff80000000 -> 0xffffffffa27fffff, 276 table(s)
xc_dom_alloc_segment:   page tables  : 0xffffffffa25ca000 -> 0xffffffffa26e1000  (pfn 0x225ca + 0x117 pages)
xc_dom_pfn_to_ptr: domU mapping: pfn 0x225ca+0x117 at 0x7f097d7a8000
xc_dom_alloc_page   :   boot stack   : 0xffffffffa26e1000 (pfn 0x226e1)
xc_dom_build_image  : virt_alloc_end : 0xffffffffa26e2000
xc_dom_build_image  : virt_pgtab_end : 0xffffffffa2800000

So the physical memory and virtual (using __START_KERNEL_map addresses)
layout looks as so:

  phys                             __ka
/------------\                   /-------------------\
| 0          | empty             | 0xffffffff80000000|
| ..         |                   | ..                |
| 16MB       | <= kernel starts  | 0xffffffff81000000|
| ..         |                   |                   |
| 30MB       | <= kernel ends => | 0xffffffff81e43000|
| ..         |  & ramdisk starts | ..                |
| 293MB      | <= ramdisk ends=> | 0xffffffff925c7000|
| ..         |  & P2M starts     | ..                |
| ..         |                   | ..                |
| 549MB      | <= P2M ends    => | 0xffffffffa25c7000|
| ..         | start_info        | 0xffffffffa25c7000|
| ..         | xenstore          | 0xffffffffa25c8000|
| ..         | cosole            | 0xffffffffa25c9000|
| 549MB      | <= page tables => | 0xffffffffa25ca000|
| ..         |                   |                   |
| 550MB      | <= PGT end     => | 0xffffffffa26e1000|
| ..         | boot stack        |                   |
\------------/                   \-------------------/

As can be seen, the ramdisk, P2M and pagetables are taking
a bit of __ka addresses space. Which is a problem since the
MODULES_VADDR starts at 0xffffffffa0000000 - and P2M sits
right in there! This results during bootup with the inability to
load modules, with this error:

------------[ cut here ]------------
WARNING: at /home/konrad/ssd/linux/mm/vmalloc.c:106 vmap_page_range_noflush+0x2d9/0x370()
Call Trace:
 [<ffffffff810719fa>] warn_slowpath_common+0x7a/0xb0
 [<ffffffff81030279>] ? __raw_callee_save_xen_pmd_val+0x11/0x1e
 [<ffffffff81071a45>] warn_slowpath_null+0x15/0x20
 [<ffffffff81130b89>] vmap_page_range_noflush+0x2d9/0x370
 [<ffffffff81130c4d>] map_vm_area+0x2d/0x50
 [<ffffffff811326d0>] __vmalloc_node_range+0x160/0x250
 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80
 [<ffffffff810c6186>] ? load_module+0x66/0x19c0
 [<ffffffff8105cadc>] module_alloc+0x5c/0x60
 [<ffffffff810c5369>] ? module_alloc_update_bounds+0x19/0x80
 [<ffffffff810c5369>] module_alloc_update_bounds+0x19/0x80
 [<ffffffff810c70c3>] load_module+0xfa3/0x19c0
 [<ffffffff812491f6>] ? security_file_permission+0x86/0x90
 [<ffffffff810c7b3a>] sys_init_module+0x5a/0x220
 [<ffffffff815ce339>] system_call_fastpath+0x16/0x1b
---[ end trace fd8f7704fdea0291 ]---
vmalloc: allocation failure, allocated 16384 of 20480 bytes
modprobe: page allocation failure: order:0, mode:0xd2

Since the __va and __ka are 1:1 up to MODULES_VADDR and
cleanup_highmap rids __ka of the ramdisk mapping, what
we want to do is similar - get rid of the P2M in the __ka
address space. There are two ways of fixing this:

 1) All P2M lookups instead of using the __ka address would
    use the __va address. This means we can safely erase from
    __ka space the PMD pointers that point to the PFNs for
    P2M array and be OK.
 2). Allocate a new array, copy the existing P2M into it,
    revector the P2M tree to use that, and return the old
    P2M to the memory allocate. This has the advantage that
    it sets the stage for using XEN_ELF_NOTE_INIT_P2M
    feature. That feature allows us to set the exact virtual
    address space we want for the P2M - and allows us to
    boot as initial domain on large machines.

So we pick option 2).

This patch only lays the groundwork in the P2M code. The patch
that modifies the MMU is called "xen/mmu: Copy and revector the P2M tree."

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c     | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/xen/xen-ops.h |  1 +
 2 files changed, 71 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index e4adbfbdfada..996ee2bf7bdb 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -389,7 +389,77 @@ void __init xen_build_dynamic_phys_to_machine(void)
 
 	m2p_override_init();
 }
+#ifdef CONFIG_X86_64
+#include <linux/bootmem.h>
+unsigned long __init xen_revector_p2m_tree(void)
+{
+	unsigned long va_start;
+	unsigned long va_end;
+	unsigned long pfn;
+	unsigned long *mfn_list = NULL;
+	unsigned long size;
+
+	va_start = xen_start_info->mfn_list;
+	/*We copy in increments of P2M_PER_PAGE * sizeof(unsigned long),
+	 * so make sure it is rounded up to that */
+	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+	va_end = va_start + size;
+
+	/* If we were revectored already, don't do it again. */
+	if (va_start <= __START_KERNEL_map && va_start >= __PAGE_OFFSET)
+		return 0;
+
+	mfn_list = alloc_bootmem_align(size, PAGE_SIZE);
+	if (!mfn_list) {
+		pr_warn("Could not allocate space for a new P2M tree!\n");
+		return xen_start_info->mfn_list;
+	}
+	/* Fill it out with INVALID_P2M_ENTRY value */
+	memset(mfn_list, 0xFF, size);
+
+	for (pfn = 0; pfn < ALIGN(MAX_DOMAIN_PAGES, P2M_PER_PAGE); pfn += P2M_PER_PAGE) {
+		unsigned topidx = p2m_top_index(pfn);
+		unsigned mididx;
+		unsigned long *mid_p;
+
+		if (!p2m_top[topidx])
+			continue;
+
+		if (p2m_top[topidx] == p2m_mid_missing)
+			continue;
+
+		mididx = p2m_mid_index(pfn);
+		mid_p = p2m_top[topidx][mididx];
+		if (!mid_p)
+			continue;
+		if ((mid_p == p2m_missing) || (mid_p == p2m_identity))
+			continue;
+
+		if ((unsigned long)mid_p == INVALID_P2M_ENTRY)
+			continue;
+
+		/* The old va. Rebase it on mfn_list */
+		if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
+			unsigned long *new;
+
+			new = &mfn_list[pfn];
+
+			copy_page(new, mid_p);
+			p2m_top[topidx][mididx] = &mfn_list[pfn];
+			p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn]);
 
+		}
+		/* This should be the leafs allocated for identity from _brk. */
+	}
+	return (unsigned long)mfn_list;
+
+}
+#else
+unsigned long __init xen_revector_p2m_tree(void)
+{
+	return 0;
+}
+#endif
 unsigned long get_phys_to_machine(unsigned long pfn)
 {
 	unsigned topidx, mididx, idx;
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 2230f57a6ebe..bb5a8105ea86 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -45,6 +45,7 @@ void xen_hvm_init_shared_info(void);
 void xen_unplug_emulated_devices(void);
 
 void __init xen_build_dynamic_phys_to_machine(void);
+unsigned long __init xen_revector_p2m_tree(void);
 
 void xen_init_irq_ops(void);
 void xen_setup_timer(int cpu);
-- 
cgit v1.2.3


From 7f9140626c757b773624b97865cb53c2a8348a69 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 26 Jul 2012 12:47:40 -0400
Subject: xen/mmu: Copy and revector the P2M tree.

Please first read the description in "xen/p2m: Add logic to revector a
P2M tree to use __va leafs" patch.

The 'xen_revector_p2m_tree()' function allocates a new P2M tree
copies the contents of the old one in it, and returns the new one.

At this stage, the __ka address space (which is what the old
P2M tree was using) is partially disassembled. The cleanup_highmap
has removed the PMD entries from 0-16MB and anything past _brk_end
up to the max_pfn_mapped (which is the end of the ramdisk).

We have revectored the P2M tree (and the one for save/restore as well)
to use new shiny __va address to new MFNs. The xen_start_info
has been taken care of already in 'xen_setup_kernel_pagetable()' and
xen_start_info->shared_info in 'xen_setup_shared_info()', so
we are free to roam and delete PMD entries - which is exactly what
we are going to do. We rip out the __ka for the old P2M array.

[v1: Fix smatch warnings]
[v2: memset was doing 0 instead of 0xff]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b44e6a88ea74..a640949f78d4 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1183,9 +1183,64 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
 
 static void xen_post_allocator_init(void);
 
+#ifdef CONFIG_X86_64
+static void __init xen_cleanhighmap(unsigned long vaddr,
+				    unsigned long vaddr_end)
+{
+	unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
+	pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
+
+	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
+	 * We include the PMD passed in on _both_ boundaries. */
+	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PAGE_SIZE));
+			pmd++, vaddr += PMD_SIZE) {
+		if (pmd_none(*pmd))
+			continue;
+		if (vaddr < (unsigned long) _text || vaddr > kernel_end)
+			set_pmd(pmd, __pmd(0));
+	}
+	/* In case we did something silly, we should crash in this function
+	 * instead of somewhere later and be confusing. */
+	xen_mc_flush();
+}
+#endif
 static void __init xen_pagetable_setup_done(pgd_t *base)
 {
+#ifdef CONFIG_X86_64
+	unsigned long size;
+	unsigned long addr;
+#endif
+
 	xen_setup_shared_info();
+#ifdef CONFIG_X86_64
+	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
+		unsigned long new_mfn_list;
+
+		size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+
+		/* On 32-bit, we get zero so this never gets executed. */
+		new_mfn_list = xen_revector_p2m_tree();
+		if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) {
+			/* using __ka address and sticking INVALID_P2M_ENTRY! */
+			memset((void *)xen_start_info->mfn_list, 0xff, size);
+
+			/* We should be in __ka space. */
+			BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
+			addr = xen_start_info->mfn_list;
+			size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
+			/* We roundup to the PMD, which means that if anybody at this stage is
+			 * using the __ka address of xen_start_info or xen_start_info->shared_info
+			 * they are in going to crash. Fortunatly we have already revectored
+			 * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */
+			size = roundup(size, PMD_SIZE);
+			xen_cleanhighmap(addr, addr + size);
+
+			memblock_free(__pa(xen_start_info->mfn_list), size);
+			/* And revector! Bye bye old array */
+			xen_start_info->mfn_list = new_mfn_list;
+		}
+	}
+#endif
 	xen_post_allocator_init();
 }
 
@@ -1824,6 +1879,8 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
 
 	/* Our (by three pages) smaller Xen pagetable that we are using */
 	memblock_reserve(PFN_PHYS(pt_base), (pt_end - pt_base) * PAGE_SIZE);
+	/* Revector the xen_start_info */
+	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
 }
 #else	/* !CONFIG_X86_64 */
 static RESERVE_BRK_ARRAY(pmd_t, initial_kernel_pmd, PTRS_PER_PMD);
-- 
cgit v1.2.3


From 3aca7fbc8ede0dd194317b2e3144815128ffb232 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 14 Aug 2012 14:34:00 -0400
Subject: xen/mmu: Remove from __ka space PMD entries for pagetables.

Please first read the description in "xen/mmu: Copy and revector the
P2M tree."

At this stage, the __ka address space (which is what the old
P2M tree was using) is partially disassembled. The cleanup_highmap
has removed the PMD entries from 0-16MB and anything past _brk_end
up to the max_pfn_mapped (which is the end of the ramdisk).

The xen_remove_p2m_tree and code around has ripped out the __ka for
the old P2M array.

Here we continue on doing it to where the Xen page-tables were.
It is safe to do it, as the page-tables are addressed using __va.
For good measure we delete anything that is within MODULES_VADDR
and up to the end of the PMD.

At this point the __ka only contains PMD entries for the start
of the kernel up to __brk.

[v1: Per Stefano's suggestion wrapped the MODULES_VADDR in debug]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index a640949f78d4..3f8e963b76c0 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1240,6 +1240,25 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
 			xen_start_info->mfn_list = new_mfn_list;
 		}
 	}
+	/* At this stage, cleanup_highmap has already cleaned __ka space
+	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
+	 * the ramdisk). We continue on, erasing PMD entries that point to page
+	 * tables - do note that they are accessible at this stage via __va.
+	 * For good measure we also round up to the PMD - which means that if
+	 * anybody is using __ka address to the initial boot-stack - and try
+	 * to use it - they are going to crash. The xen_start_info has been
+	 * taken care of already in xen_setup_kernel_pagetable. */
+	addr = xen_start_info->pt_base;
+	size = roundup(xen_start_info->nr_pt_frames * PAGE_SIZE, PMD_SIZE);
+
+	xen_cleanhighmap(addr, addr + size);
+	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
+#ifdef DEBUG
+	/* This is superflous and is not neccessary, but you know what
+	 * lets do it. The MODULES_VADDR -> MODULES_END should be clear of
+	 * anything at this stage. */
+	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
+#endif
 #endif
 	xen_post_allocator_init();
 }
-- 
cgit v1.2.3


From 785f62314984ea3af9dd830b020289ba2509ae69 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 14 Aug 2012 16:37:31 -0400
Subject: xen/mmu: Release just the MFN list, not MFN list and part of
 pagetables.

We call memblock_reserve for [start of mfn list] -> [PMD aligned end
of mfn list] instead of <start of mfn list> -> <page aligned end of mfn list].

This has the disastrous effect that if at bootup the end of mfn_list is
not PMD aligned we end up returning to memblock parts of the region
past the mfn_list array. And those parts are the PTE tables with
the disastrous effect of seeing this at bootup:

Write protecting the kernel read-only data: 10240k
Freeing unused kernel memory: 1860k freed
Freeing unused kernel memory: 200k freed
(XEN) mm.c:2429:d0 Bad type (saw 1400000000000002 != exp 7000000000000000) for mfn 116a80 (pfn 14e26)
...
(XEN) mm.c:908:d0 Error getting mfn 116a83 (pfn 14e2a) from L1 entry 8000000116a83067 for l1e_owner=0, pg_owner=0
(XEN) mm.c:908:d0 Error getting mfn 4040 (pfn 5555555555555555) from L1 entry 0000000004040601 for l1e_owner=0, pg_owner=0
.. and so on.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 3f8e963b76c0..5b2cb54425ce 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1227,7 +1227,6 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
 			/* We should be in __ka space. */
 			BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map);
 			addr = xen_start_info->mfn_list;
-			size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 			/* We roundup to the PMD, which means that if anybody at this stage is
 			 * using the __ka address of xen_start_info or xen_start_info->shared_info
 			 * they are in going to crash. Fortunatly we have already revectored
@@ -1235,6 +1234,7 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
 			size = roundup(size, PMD_SIZE);
 			xen_cleanhighmap(addr, addr + size);
 
+			size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
 			memblock_free(__pa(xen_start_info->mfn_list), size);
 			/* And revector! Bye bye old array */
 			xen_start_info->mfn_list = new_mfn_list;
-- 
cgit v1.2.3


From 3fc509fc0c590900568ef516a37101d88f3476f5 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 16 Aug 2012 16:38:55 -0400
Subject: xen/p2m: When revectoring deal with holes in the P2M array.

When we free the PFNs and then subsequently populate them back
during bootup:

Freeing 20000-20200 pfn range: 512 pages freed
1-1 mapping on 20000->20200
Freeing 40000-40200 pfn range: 512 pages freed
1-1 mapping on 40000->40200
Freeing bad80-badf4 pfn range: 116 pages freed
1-1 mapping on bad80->badf4
Freeing badf6-bae7f pfn range: 137 pages freed
1-1 mapping on badf6->bae7f
Freeing bb000-100000 pfn range: 282624 pages freed
1-1 mapping on bb000->100000
Released 283999 pages of unused memory
Set 283999 page(s) to 1-1 mapping
Populating 1acb8a-1f20e9 pfn range: 283999 pages added

We end up having the P2M array (that is the one that was
grafted on the P2M tree) filled with IDENTITY_FRAME or
INVALID_P2M_ENTRY) entries. The patch titled

"xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID."
recycles said slots and replaces the P2M tree leaf's with
 &mfn_list[xx] with p2m_identity or p2m_missing.

And re-uses the P2M array sections for other P2M tree leaf's.
For the above mentioned bootup excerpt, the PFNs at
0x20000->0x20200 are going to be IDENTITY based:

P2M[0][256][0] -> P2M[0][257][0] get turned in IDENTITY_FRAME.

We can re-use that and replace P2M[0][256] to point to p2m_identity.
The "old" page (the grafted P2M array provided by Xen) that was at
P2M[0][256] gets put somewhere else. Specifically at P2M[6][358],
b/c when we populate back:

Populating 1acb8a-1f20e9 pfn range: 283999 pages added

we fill P2M[6][358][0] (and P2M[6][358], P2M[6][359], ...) with
the new MFNs.

That is all OK, except when we revector we assume that the PFN
count would be the same in the grafted P2M array and in the
newly allocated. Since that is no longer the case, as we have
holes in the P2M that point to p2m_missing or p2m_identity we
have to take that into account.

[v2: Check for overflow]
[v3: Move within the __va check]
[v4: Fix the computation]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 996ee2bf7bdb..c3e92912c3fb 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -396,6 +396,7 @@ unsigned long __init xen_revector_p2m_tree(void)
 	unsigned long va_start;
 	unsigned long va_end;
 	unsigned long pfn;
+	unsigned long pfn_free = 0;
 	unsigned long *mfn_list = NULL;
 	unsigned long size;
 
@@ -442,11 +443,18 @@ unsigned long __init xen_revector_p2m_tree(void)
 		if (mid_p >= (unsigned long *)va_start && mid_p <= (unsigned long *)va_end) {
 			unsigned long *new;
 
-			new = &mfn_list[pfn];
+			if (pfn_free  > (size / sizeof(unsigned long))) {
+				WARN(1, "Only allocated for %ld pages, but we want %ld!\n",
+				     size / sizeof(unsigned long), pfn_free);
+				return 0;
+			}
+			new = &mfn_list[pfn_free];
 
 			copy_page(new, mid_p);
-			p2m_top[topidx][mididx] = &mfn_list[pfn];
-			p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn]);
+			p2m_top[topidx][mididx] = &mfn_list[pfn_free];
+			p2m_top_mfn_p[topidx][mididx] = virt_to_mfn(&mfn_list[pfn_free]);
+
+			pfn_free += P2M_PER_PAGE;
 
 		}
 		/* This should be the leafs allocated for identity from _brk. */
-- 
cgit v1.2.3


From 328731876451a837f56e66ffa11de053ed5daf73 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 17 Aug 2012 09:35:31 -0400
Subject: xen/mmu: If the revector fails, don't attempt to revector anything
 else.

If the P2M revectoring would fail, we would try to continue on by
cleaning the PMD for L1 (PTE) page-tables. The xen_cleanhighmap
is greedy and erases the PMD on both boundaries. Since the P2M
array can share the PMD, we would wipe out part of the __ka
that is still used in the P2M tree to point to P2M leafs.

This fixes it by bypassing the revectoring and continuing on.
If the revector fails, a nice WARN is printed so we can still
troubleshoot this.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5b2cb54425ce..cb9db72b33f8 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1238,7 +1238,8 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
 			memblock_free(__pa(xen_start_info->mfn_list), size);
 			/* And revector! Bye bye old array */
 			xen_start_info->mfn_list = new_mfn_list;
-		}
+		} else
+			goto skip;
 	}
 	/* At this stage, cleanup_highmap has already cleaned __ka space
 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
@@ -1259,6 +1260,7 @@ static void __init xen_pagetable_setup_done(pgd_t *base)
 	 * anything at this stage. */
 	xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1);
 #endif
+skip:
 #endif
 	xen_post_allocator_init();
 }
-- 
cgit v1.2.3


From e3e45c01ae690e65f2650e5288b9af802e95a136 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Fri, 24 Aug 2012 15:34:34 +0200
Subject: perf/x86: Fix microcode revision check for SNB-PEBS

The following patch makes the microcode update code path
actually invoke the perf_check_microcode() function and
thus potentially renabling SNB PEBS.

By default, CONFIG_MICROCODE_OLD_INTERFACE is
forced to Y in arch/x86/Kconfig. There is no
way to disable this. That means that the code
path used in arch/x86/kernel/microcode_core.c
did not include the call to perf_check_microcode().

Thus, even though the microcode was updated to a
version that fixes the SNB PEBS problem, perf_event
would still return EOPNOTSUPP when enabling precise
sampling.

This patch simply adds a call to perf_check_microcode()
in the call path used when OLD_INTERFACE=y.

Signed-off-by: Stephane Eranian <eranian@google.com>
Acked-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: peterz@infradead.org
Cc: andi@firstfloor.org
Link: http://lkml.kernel.org/r/20120824133434.GA8014@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/microcode_core.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c
index 4873e62db6a1..9e5bcf1e2376 100644
--- a/arch/x86/kernel/microcode_core.c
+++ b/arch/x86/kernel/microcode_core.c
@@ -225,6 +225,9 @@ static ssize_t microcode_write(struct file *file, const char __user *buf,
 	if (do_microcode_update(buf, len) == 0)
 		ret = (ssize_t)len;
 
+	if (ret > 0)
+		perf_check_microcode();
+
 	mutex_unlock(&microcode_mutex);
 	put_online_cpus();
 
-- 
cgit v1.2.3


From dd856efafe6097a5c9104725c2bca74430423db8 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Mon, 27 Aug 2012 23:46:17 +0300
Subject: KVM: x86 emulator: access GPRs on demand

Instead of populating the entire register file, read in registers
as they are accessed, and write back only the modified ones.  This
saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually
used during emulation), and a two 128-byte copies for the registers.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  20 ++-
 arch/x86/kvm/emulate.c             | 299 +++++++++++++++++++++++--------------
 arch/x86/kvm/x86.c                 |  45 +++---
 3 files changed, 220 insertions(+), 144 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index c764f43b71c5..282aee5d6ac1 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -85,6 +85,19 @@ struct x86_instruction_info {
 #define X86EMUL_INTERCEPTED     6 /* Intercepted by nested VMCB/VMCS */
 
 struct x86_emulate_ops {
+	/*
+	 * read_gpr: read a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 */
+	ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg);
+	/*
+	 * write_gpr: write a general purpose register (rax - r15)
+	 *
+	 * @reg: gpr number.
+	 * @val: value to write.
+	 */
+	void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val);
 	/*
 	 * read_std: Read bytes of standard (non-emulated/special) memory.
 	 *           Used for descriptor reading.
@@ -281,8 +294,10 @@ struct x86_emulate_ctxt {
 	bool rip_relative;
 	unsigned long _eip;
 	struct operand memop;
+	u32 regs_valid;  /* bitmaps of registers in _regs[] that can be read */
+	u32 regs_dirty;  /* bitmaps of registers in _regs[] that have been written */
 	/* Fields above regs are cleared together. */
-	unsigned long regs[NR_VCPU_REGS];
+	unsigned long _regs[NR_VCPU_REGS];
 	struct operand *memopp;
 	struct fetch_cache fetch;
 	struct read_cache io_read;
@@ -394,4 +409,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 			 u16 tss_selector, int idt_index, int reason,
 			 bool has_error_code, u32 error_code);
 int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq);
+void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
+void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
+
 #endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e8fb6c5c6c0a..5e27ba532613 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -202,6 +202,42 @@ struct gprefix {
 #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a
 #define EFLG_RESERVED_ONE_MASK 2
 
+static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	if (!(ctxt->regs_valid & (1 << nr))) {
+		ctxt->regs_valid |= 1 << nr;
+		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+	}
+	return ctxt->_regs[nr];
+}
+
+static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	ctxt->regs_valid |= 1 << nr;
+	ctxt->regs_dirty |= 1 << nr;
+	return &ctxt->_regs[nr];
+}
+
+static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	reg_read(ctxt, nr);
+	return reg_write(ctxt, nr);
+}
+
+static void writeback_registers(struct x86_emulate_ctxt *ctxt)
+{
+	unsigned reg;
+
+	for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16)
+		ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]);
+}
+
+static void invalidate_registers(struct x86_emulate_ctxt *ctxt)
+{
+	ctxt->regs_dirty = 0;
+	ctxt->regs_valid = 0;
+}
+
 /*
  * Instruction emulation:
  * Most instructions are emulated directly via a fragment of inline assembly
@@ -374,8 +410,8 @@ struct gprefix {
 #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex)			\
 	do {								\
 		unsigned long _tmp;					\
-		ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX];		\
-		ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX];		\
+		ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX);		\
+		ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX);		\
 									\
 		__asm__ __volatile__ (					\
 			_PRE_EFLAGS("0", "5", "1")			\
@@ -494,7 +530,7 @@ register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, in
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
 {
-	masked_increment(&ctxt->regs[VCPU_REGS_RSP], stack_mask(ctxt), inc);
+	masked_increment(reg_rmw(ctxt, VCPU_REGS_RSP), stack_mask(ctxt), inc);
 }
 
 static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
@@ -786,14 +822,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
  * pointer into the block that addresses the relevant register.
  * @highbyte_regs specifies whether to decode AH,CH,DH,BH.
  */
-static void *decode_register(u8 modrm_reg, unsigned long *regs,
+static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg,
 			     int highbyte_regs)
 {
 	void *p;
 
-	p = &regs[modrm_reg];
 	if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8)
-		p = (unsigned char *)&regs[modrm_reg & 3] + 1;
+		p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1;
+	else
+		p = reg_rmw(ctxt, modrm_reg);
 	return p;
 }
 
@@ -982,10 +1019,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt,
 
 	op->type = OP_REG;
 	if (ctxt->d & ByteOp) {
-		op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs);
+		op->addr.reg = decode_register(ctxt, reg, highbyte_regs);
 		op->bytes = 1;
 	} else {
-		op->addr.reg = decode_register(reg, ctxt->regs, 0);
+		op->addr.reg = decode_register(ctxt, reg, 0);
 		op->bytes = ctxt->op_bytes;
 	}
 	fetch_register_operand(op);
@@ -1020,8 +1057,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	if (ctxt->modrm_mod == 3) {
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = decode_register(ctxt->modrm_rm,
-					       ctxt->regs, ctxt->d & ByteOp);
+		op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp);
 		if (ctxt->d & Sse) {
 			op->type = OP_XMM;
 			op->bytes = 16;
@@ -1042,10 +1078,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 	op->type = OP_MEM;
 
 	if (ctxt->ad_bytes == 2) {
-		unsigned bx = ctxt->regs[VCPU_REGS_RBX];
-		unsigned bp = ctxt->regs[VCPU_REGS_RBP];
-		unsigned si = ctxt->regs[VCPU_REGS_RSI];
-		unsigned di = ctxt->regs[VCPU_REGS_RDI];
+		unsigned bx = reg_read(ctxt, VCPU_REGS_RBX);
+		unsigned bp = reg_read(ctxt, VCPU_REGS_RBP);
+		unsigned si = reg_read(ctxt, VCPU_REGS_RSI);
+		unsigned di = reg_read(ctxt, VCPU_REGS_RDI);
 
 		/* 16-bit ModR/M decode. */
 		switch (ctxt->modrm_mod) {
@@ -1102,17 +1138,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt,
 			if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0)
 				modrm_ea += insn_fetch(s32, ctxt);
 			else {
-				modrm_ea += ctxt->regs[base_reg];
+				modrm_ea += reg_read(ctxt, base_reg);
 				adjust_modrm_seg(ctxt, base_reg);
 			}
 			if (index_reg != 4)
-				modrm_ea += ctxt->regs[index_reg] << scale;
+				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
 			base_reg = ctxt->modrm_rm;
-			modrm_ea += ctxt->regs[base_reg];
+			modrm_ea += reg_read(ctxt, base_reg);
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
@@ -1250,10 +1286,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 	if (rc->pos == rc->end) { /* refill pio read ahead */
 		unsigned int in_page, n;
 		unsigned int count = ctxt->rep_prefix ?
-			address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1;
+			address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1;
 		in_page = (ctxt->eflags & EFLG_DF) ?
-			offset_in_page(ctxt->regs[VCPU_REGS_RDI]) :
-			PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]);
+			offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) :
+			PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI));
 		n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size,
 			count);
 		if (n == 0)
@@ -1533,7 +1569,7 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes)
 	struct segmented_address addr;
 
 	rsp_increment(ctxt, -bytes);
-	addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
+	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
 	addr.seg = VCPU_SREG_SS;
 
 	return segmented_write(ctxt, addr, data, bytes);
@@ -1552,7 +1588,7 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 	int rc;
 	struct segmented_address addr;
 
-	addr.ea = ctxt->regs[VCPU_REGS_RSP] & stack_mask(ctxt);
+	addr.ea = reg_read(ctxt, VCPU_REGS_RSP) & stack_mask(ctxt);
 	addr.seg = VCPU_SREG_SS;
 	rc = segmented_read(ctxt, addr, dest, len);
 	if (rc != X86EMUL_CONTINUE)
@@ -1620,26 +1656,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt)
 	int rc;
 	unsigned frame_size = ctxt->src.val;
 	unsigned nesting_level = ctxt->src2.val & 31;
+	ulong rbp;
 
 	if (nesting_level)
 		return X86EMUL_UNHANDLEABLE;
 
-	rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt));
+	rbp = reg_read(ctxt, VCPU_REGS_RBP);
+	rc = push(ctxt, &rbp, stack_size(ctxt));
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP),
 		      stack_mask(ctxt));
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP],
-		      ctxt->regs[VCPU_REGS_RSP] - frame_size,
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP),
+		      reg_read(ctxt, VCPU_REGS_RSP) - frame_size,
 		      stack_mask(ctxt));
 	return X86EMUL_CONTINUE;
 }
 
 static int em_leave(struct x86_emulate_ctxt *ctxt)
 {
-	assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP],
+	assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP),
 		      stack_mask(ctxt));
-	return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes);
+	return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes);
 }
 
 static int em_push_sreg(struct x86_emulate_ctxt *ctxt)
@@ -1667,13 +1705,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 
 static int em_pusha(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP];
+	unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP);
 	int rc = X86EMUL_CONTINUE;
 	int reg = VCPU_REGS_RAX;
 
 	while (reg <= VCPU_REGS_RDI) {
 		(reg == VCPU_REGS_RSP) ?
-		(ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]);
+		(ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg));
 
 		rc = em_push(ctxt);
 		if (rc != X86EMUL_CONTINUE)
@@ -1702,7 +1740,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 			--reg;
 		}
 
-		rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes);
+		rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes);
 		if (rc != X86EMUL_CONTINUE)
 			break;
 		--reg;
@@ -1710,7 +1748,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
+static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
 	int rc;
@@ -1759,11 +1797,22 @@ int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	return rc;
 }
 
+int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
+{
+	int rc;
+
+	invalidate_registers(ctxt);
+	rc = __emulate_int_real(ctxt, irq);
+	if (rc == X86EMUL_CONTINUE)
+		writeback_registers(ctxt);
+	return rc;
+}
+
 static int emulate_int(struct x86_emulate_ctxt *ctxt, int irq)
 {
 	switch(ctxt->mode) {
 	case X86EMUL_MODE_REAL:
-		return emulate_int_real(ctxt, irq);
+		return __emulate_int_real(ctxt, irq);
 	case X86EMUL_MODE_VM86:
 	case X86EMUL_MODE_PROT16:
 	case X86EMUL_MODE_PROT32:
@@ -1970,14 +2019,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
 {
 	u64 old = ctxt->dst.orig_val64;
 
-	if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) ||
-	    ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) {
-		ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0);
-		ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32);
+	if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) ||
+	    ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) {
+		*reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0);
+		*reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32);
 		ctxt->eflags &= ~EFLG_ZF;
 	} else {
-		ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) |
-			(u32) ctxt->regs[VCPU_REGS_RBX];
+		ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) |
+			(u32) reg_read(ctxt, VCPU_REGS_RBX);
 
 		ctxt->eflags |= EFLG_ZF;
 	}
@@ -2013,7 +2062,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 {
 	/* Save real source value, then compare EAX against destination. */
 	ctxt->src.orig_val = ctxt->src.val;
-	ctxt->src.val = ctxt->regs[VCPU_REGS_RAX];
+	ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX);
 	emulate_2op_SrcV(ctxt, "cmp");
 
 	if (ctxt->eflags & EFLG_ZF) {
@@ -2022,7 +2071,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 	} else {
 		/* Failure: write the value we saw to EAX. */
 		ctxt->dst.type = OP_REG;
-		ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX];
+		ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 	}
 	return X86EMUL_CONTINUE;
 }
@@ -2159,10 +2208,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip;
 	if (efer & EFER_LMA) {
 #ifdef CONFIG_X86_64
-		ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF;
+		*reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF;
 
 		ops->get_msr(ctxt,
 			     ctxt->mode == X86EMUL_MODE_PROT64 ?
@@ -2241,7 +2290,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 	ctxt->_eip = msr_data;
 
 	ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
-	ctxt->regs[VCPU_REGS_RSP] = msr_data;
+	*reg_write(ctxt, VCPU_REGS_RSP) = msr_data;
 
 	return X86EMUL_CONTINUE;
 }
@@ -2291,8 +2340,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
 	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
 
-	ctxt->_eip = ctxt->regs[VCPU_REGS_RDX];
-	ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX];
+	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
+	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
 
 	return X86EMUL_CONTINUE;
 }
@@ -2361,14 +2410,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt,
 {
 	tss->ip = ctxt->_eip;
 	tss->flag = ctxt->eflags;
-	tss->ax = ctxt->regs[VCPU_REGS_RAX];
-	tss->cx = ctxt->regs[VCPU_REGS_RCX];
-	tss->dx = ctxt->regs[VCPU_REGS_RDX];
-	tss->bx = ctxt->regs[VCPU_REGS_RBX];
-	tss->sp = ctxt->regs[VCPU_REGS_RSP];
-	tss->bp = ctxt->regs[VCPU_REGS_RBP];
-	tss->si = ctxt->regs[VCPU_REGS_RSI];
-	tss->di = ctxt->regs[VCPU_REGS_RDI];
+	tss->ax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->cx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->dx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->bx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->sp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->bp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->si = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->di = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2384,14 +2433,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 
 	ctxt->_eip = tss->ip;
 	ctxt->eflags = tss->flag | 2;
-	ctxt->regs[VCPU_REGS_RAX] = tss->ax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->cx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->dx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->bx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->sp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->bp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->si;
-	ctxt->regs[VCPU_REGS_RDI] = tss->di;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->ax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->cx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->dx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->bx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->sp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->bp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->si;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->di;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2476,14 +2525,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt,
 	tss->cr3 = ctxt->ops->get_cr(ctxt, 3);
 	tss->eip = ctxt->_eip;
 	tss->eflags = ctxt->eflags;
-	tss->eax = ctxt->regs[VCPU_REGS_RAX];
-	tss->ecx = ctxt->regs[VCPU_REGS_RCX];
-	tss->edx = ctxt->regs[VCPU_REGS_RDX];
-	tss->ebx = ctxt->regs[VCPU_REGS_RBX];
-	tss->esp = ctxt->regs[VCPU_REGS_RSP];
-	tss->ebp = ctxt->regs[VCPU_REGS_RBP];
-	tss->esi = ctxt->regs[VCPU_REGS_RSI];
-	tss->edi = ctxt->regs[VCPU_REGS_RDI];
+	tss->eax = reg_read(ctxt, VCPU_REGS_RAX);
+	tss->ecx = reg_read(ctxt, VCPU_REGS_RCX);
+	tss->edx = reg_read(ctxt, VCPU_REGS_RDX);
+	tss->ebx = reg_read(ctxt, VCPU_REGS_RBX);
+	tss->esp = reg_read(ctxt, VCPU_REGS_RSP);
+	tss->ebp = reg_read(ctxt, VCPU_REGS_RBP);
+	tss->esi = reg_read(ctxt, VCPU_REGS_RSI);
+	tss->edi = reg_read(ctxt, VCPU_REGS_RDI);
 
 	tss->es = get_segment_selector(ctxt, VCPU_SREG_ES);
 	tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS);
@@ -2505,14 +2554,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	ctxt->eflags = tss->eflags | 2;
 
 	/* General purpose registers */
-	ctxt->regs[VCPU_REGS_RAX] = tss->eax;
-	ctxt->regs[VCPU_REGS_RCX] = tss->ecx;
-	ctxt->regs[VCPU_REGS_RDX] = tss->edx;
-	ctxt->regs[VCPU_REGS_RBX] = tss->ebx;
-	ctxt->regs[VCPU_REGS_RSP] = tss->esp;
-	ctxt->regs[VCPU_REGS_RBP] = tss->ebp;
-	ctxt->regs[VCPU_REGS_RSI] = tss->esi;
-	ctxt->regs[VCPU_REGS_RDI] = tss->edi;
+	*reg_write(ctxt, VCPU_REGS_RAX) = tss->eax;
+	*reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tss->edx;
+	*reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx;
+	*reg_write(ctxt, VCPU_REGS_RSP) = tss->esp;
+	*reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp;
+	*reg_write(ctxt, VCPU_REGS_RSI) = tss->esi;
+	*reg_write(ctxt, VCPU_REGS_RDI) = tss->edi;
 
 	/*
 	 * SDM says that segment selectors are loaded before segment
@@ -2727,14 +2776,17 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 {
 	int rc;
 
+	invalidate_registers(ctxt);
 	ctxt->_eip = ctxt->eip;
 	ctxt->dst.type = OP_NONE;
 
 	rc = emulator_do_task_switch(ctxt, tss_selector, idt_index, reason,
 				     has_error_code, error_code);
 
-	if (rc == X86EMUL_CONTINUE)
+	if (rc == X86EMUL_CONTINUE) {
 		ctxt->eip = ctxt->_eip;
+		writeback_registers(ctxt);
+	}
 
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
@@ -2744,8 +2796,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
-	register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]);
+	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
 	op->addr.mem.seg = seg;
 }
 
@@ -2921,7 +2973,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt)
 {
 	ctxt->dst.type = OP_REG;
 	ctxt->dst.bytes = ctxt->src.bytes;
-	ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+	ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 	ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1);
 
 	return X86EMUL_CONTINUE;
@@ -2932,8 +2984,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt)
 	u64 tsc = 0;
 
 	ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)tsc;
-	ctxt->regs[VCPU_REGS_RDX] = tsc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2941,10 +2993,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 pmc;
 
-	if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc))
+	if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc))
 		return emulate_gp(ctxt, 0);
-	ctxt->regs[VCPU_REGS_RAX] = (u32)pmc;
-	ctxt->regs[VCPU_REGS_RDX] = pmc >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc;
+	*reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -2986,9 +3038,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	msr_data = (u32)ctxt->regs[VCPU_REGS_RAX]
-		| ((u64)ctxt->regs[VCPU_REGS_RDX] << 32);
-	if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data))
+	msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX)
+		| ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32);
+	if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data))
 		return emulate_gp(ctxt, 0);
 
 	return X86EMUL_CONTINUE;
@@ -2998,11 +3050,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt)
 {
 	u64 msr_data;
 
-	if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data))
+	if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data))
 		return emulate_gp(ctxt, 0);
 
-	ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data;
-	ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32;
+	*reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data;
+	*reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3182,8 +3234,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
 
 static int em_loop(struct x86_emulate_ctxt *ctxt)
 {
-	register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
-	if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) &&
+	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		jmp_rel(ctxt, ctxt->src.val);
 
@@ -3192,7 +3244,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt)
 
 static int em_jcxz(struct x86_emulate_ctxt *ctxt)
 {
-	if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0)
+	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
 		jmp_rel(ctxt, ctxt->src.val);
 
 	return X86EMUL_CONTINUE;
@@ -3280,20 +3332,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt)
 {
 	u32 eax, ebx, ecx, edx;
 
-	eax = ctxt->regs[VCPU_REGS_RAX];
-	ecx = ctxt->regs[VCPU_REGS_RCX];
+	eax = reg_read(ctxt, VCPU_REGS_RAX);
+	ecx = reg_read(ctxt, VCPU_REGS_RCX);
 	ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx);
-	ctxt->regs[VCPU_REGS_RAX] = eax;
-	ctxt->regs[VCPU_REGS_RBX] = ebx;
-	ctxt->regs[VCPU_REGS_RCX] = ecx;
-	ctxt->regs[VCPU_REGS_RDX] = edx;
+	*reg_write(ctxt, VCPU_REGS_RAX) = eax;
+	*reg_write(ctxt, VCPU_REGS_RBX) = ebx;
+	*reg_write(ctxt, VCPU_REGS_RCX) = ecx;
+	*reg_write(ctxt, VCPU_REGS_RDX) = edx;
 	return X86EMUL_CONTINUE;
 }
 
 static int em_lahf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL;
-	ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL;
+	*reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3450,7 +3502,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt)
 
 static int check_svme_pa(struct x86_emulate_ctxt *ctxt)
 {
-	u64 rax = ctxt->regs[VCPU_REGS_RAX];
+	u64 rax = reg_read(ctxt, VCPU_REGS_RAX);
 
 	/* Valid physical address? */
 	if (rax & 0xffff000000000000ULL)
@@ -3472,7 +3524,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt)
 static int check_rdpmc(struct x86_emulate_ctxt *ctxt)
 {
 	u64 cr4 = ctxt->ops->get_cr(ctxt, 4);
-	u64 rcx = ctxt->regs[VCPU_REGS_RCX];
+	u64 rcx = reg_read(ctxt, VCPU_REGS_RCX);
 
 	if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) ||
 	    (rcx > 3))
@@ -3930,7 +3982,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 	case OpAcc:
 		op->type = OP_REG;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RAX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX);
 		fetch_register_operand(op);
 		op->orig_val = op->val;
 		break;
@@ -3938,19 +3990,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		break;
 	case OpDX:
 		op->type = OP_REG;
 		op->bytes = 2;
-		op->addr.reg = &ctxt->regs[VCPU_REGS_RDX];
+		op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX);
 		fetch_register_operand(op);
 		break;
 	case OpCL:
 		op->bytes = 1;
-		op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff;
+		op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff;
 		break;
 	case OpImmByte:
 		rc = decode_imm(ctxt, op, 1, true);
@@ -3981,7 +4033,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]);
+			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
 		break;
@@ -4287,6 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 		read_mmx_reg(ctxt, &op->mm_val, op->addr.mm);
 }
 
+
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
 	struct x86_emulate_ops *ops = ctxt->ops;
@@ -4371,7 +4424,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		/* All REP prefixes have the same first termination condition */
-		if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) {
+		if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) {
 			ctxt->eip = ctxt->_eip;
 			goto done;
 		}
@@ -4444,7 +4497,7 @@ special_insn:
 		ctxt->dst.val = ctxt->src.addr.mem.ea;
 		break;
 	case 0x90 ... 0x97: /* nop / xchg reg, rax */
-		if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX])
+		if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX))
 			break;
 		rc = em_xchg(ctxt);
 		break;
@@ -4472,7 +4525,7 @@ special_insn:
 		rc = em_grp2(ctxt);
 		break;
 	case 0xd2 ... 0xd3:	/* Grp2 */
-		ctxt->src.val = ctxt->regs[VCPU_REGS_RCX];
+		ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX);
 		rc = em_grp2(ctxt);
 		break;
 	case 0xe9: /* jmp rel */
@@ -4527,14 +4580,14 @@ writeback:
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1);
+		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
 			 * Re-enter guest when pio read ahead buffer is empty
 			 * or, if it is not used, after each 1024 iteration.
 			 */
-			if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) &&
+			if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) &&
 			    (r->end == 0 || r->end != r->pos)) {
 				/*
 				 * Reset read cache. Usually happens before
@@ -4542,6 +4595,7 @@ writeback:
 				 * we have to do it here.
 				 */
 				ctxt->mem_read.end = 0;
+				writeback_registers(ctxt);
 				return EMULATION_RESTART;
 			}
 			goto done; /* skip rip writeback */
@@ -4556,6 +4610,9 @@ done:
 	if (rc == X86EMUL_INTERCEPTED)
 		return EMULATION_INTERCEPTED;
 
+	if (rc == X86EMUL_CONTINUE)
+		writeback_registers(ctxt);
+
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 
 twobyte_insn:
@@ -4628,3 +4685,13 @@ twobyte_insn:
 cannot_emulate:
 	return EMULATION_FAILED;
 }
+
+void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt)
+{
+	invalidate_registers(ctxt);
+}
+
+void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt)
+{
+	writeback_registers(ctxt);
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 42bbf4187d20..e00050ce7a6a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4313,7 +4313,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt,
 	kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx);
 }
 
+static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg)
+{
+	return kvm_register_read(emul_to_vcpu(ctxt), reg);
+}
+
+static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val)
+{
+	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
+}
+
 static struct x86_emulate_ops emulate_ops = {
+	.read_gpr            = emulator_read_gpr,
+	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
 	.write_std           = kvm_write_guest_virt_system,
 	.fetch               = kvm_fetch_guest_virt,
@@ -4348,14 +4360,6 @@ static struct x86_emulate_ops emulate_ops = {
 	.get_cpuid           = emulator_get_cpuid,
 };
 
-static void cache_all_regs(struct kvm_vcpu *vcpu)
-{
-	kvm_register_read(vcpu, VCPU_REGS_RAX);
-	kvm_register_read(vcpu, VCPU_REGS_RSP);
-	kvm_register_read(vcpu, VCPU_REGS_RIP);
-	vcpu->arch.regs_dirty = ~0;
-}
-
 static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
 {
 	u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask);
@@ -4382,12 +4386,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu)
 		kvm_queue_exception(vcpu, ctxt->exception.vector);
 }
 
-static void init_decode_cache(struct x86_emulate_ctxt *ctxt,
-			      const unsigned long *regs)
+static void init_decode_cache(struct x86_emulate_ctxt *ctxt)
 {
 	memset(&ctxt->twobyte, 0,
-	       (void *)&ctxt->regs - (void *)&ctxt->twobyte);
-	memcpy(ctxt->regs, regs, sizeof(ctxt->regs));
+	       (void *)&ctxt->_regs - (void *)&ctxt->twobyte);
 
 	ctxt->fetch.start = 0;
 	ctxt->fetch.end = 0;
@@ -4402,14 +4404,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int cs_db, cs_l;
 
-	/*
-	 * TODO: fix emulate.c to use guest_read/write_register
-	 * instead of direct ->regs accesses, can save hundred cycles
-	 * on Intel for instructions that don't read/change RSP, for
-	 * for example.
-	 */
-	cache_all_regs(vcpu);
-
 	kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
 
 	ctxt->eflags = kvm_get_rflags(vcpu);
@@ -4421,7 +4415,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 							  X86EMUL_MODE_PROT16;
 	ctxt->guest_mode = is_guest_mode(vcpu);
 
-	init_decode_cache(ctxt, vcpu->arch.regs);
+	init_decode_cache(ctxt);
 	vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
 }
 
@@ -4441,7 +4435,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip)
 		return EMULATE_FAIL;
 
 	ctxt->eip = ctxt->_eip;
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 
@@ -4599,7 +4592,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 	   changes registers values  during IO operation */
 	if (vcpu->arch.emulate_regs_need_sync_from_vcpu) {
 		vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
-		memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs);
+		emulator_invalidate_register_cache(ctxt);
 	}
 
 restart:
@@ -4637,7 +4630,6 @@ restart:
 		toggle_interruptibility(vcpu, ctxt->interruptibility);
 		kvm_set_rflags(vcpu, ctxt->eflags);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 		kvm_rip_write(vcpu, ctxt->eip);
 	} else
@@ -5591,8 +5583,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 		 * that usually, but some bad designed PV devices (vmware
 		 * backdoor interface) need this to work
 		 */
-		struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
-		memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
+		emulator_writeback_register_cache(&vcpu->arch.emulate_ctxt);
 		vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
 	}
 	regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX);
@@ -5723,6 +5714,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
+	unsigned reg;
 
 	init_emulate_ctxt(vcpu);
 
@@ -5732,7 +5724,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 	if (ret)
 		return EMULATE_FAIL;
 
-	memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs);
 	kvm_rip_write(vcpu, ctxt->eip);
 	kvm_set_rflags(vcpu, ctxt->eflags);
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
-- 
cgit v1.2.3


From baa7e81e325bbb6ddebd7680ac1068859244b61d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:06:58 +0300
Subject: KVM: VMX: Separate saving pre-realmode state from setting segments

Commit b246dd5df139 ("KVM: VMX: Fix KVM_SET_SREGS with big real mode
segments") moved fix_rmode_seg() to vmx_set_segment(), so that it is
applied not just on transitions to real mode, but also on KVM_SET_SREGS
(migration).  However fix_rmode_seg() not only munges the vmcs segments,
it also sets up the save area for us to restore when returning to
protected mode or to return in vmx_get_segment().

Move saving the segment into a new function, save_rmode_seg(), and
call it just during the transition.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 13e0296cea46..4e49caf9224d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2768,7 +2768,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 	return kvm->arch.tss_addr;
 }
 
-static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+static void save_rmode_seg(int seg, struct kvm_save_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
@@ -2776,6 +2776,12 @@ static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
 	save->base = vmcs_readl(sf->base);
 	save->limit = vmcs_read32(sf->limit);
 	save->ar = vmcs_read32(sf->ar_bytes);
+}
+
+static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+{
+	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+
 	vmcs_write16(sf->selector, save->base >> 4);
 	vmcs_write32(sf->base, save->base & 0xffff0);
 	vmcs_write32(sf->limit, 0xffff);
@@ -2798,6 +2804,12 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	vmx->emulation_required = 1;
 	vmx->rmode.vm86_active = 1;
 
+	save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr);
+	save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
+	save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
+	save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+	save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
+
 	/*
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
 	 * vcpu. Call it here with phys address pointing 16M below 4G.
@@ -2812,14 +2824,8 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
 	vmx_segment_cache_clear(vmx);
 
-	vmx->rmode.tr.selector = vmcs_read16(GUEST_TR_SELECTOR);
-	vmx->rmode.tr.base = vmcs_readl(GUEST_TR_BASE);
 	vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm));
-
-	vmx->rmode.tr.limit = vmcs_read32(GUEST_TR_LIMIT);
 	vmcs_write32(GUEST_TR_LIMIT, RMODE_TSS_SIZE - 1);
-
-	vmx->rmode.tr.ar = vmcs_read32(GUEST_TR_AR_BYTES);
 	vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
-- 
cgit v1.2.3


From 72fbefec26841699fee9ad0b050624aeb43d5bae Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:06:59 +0300
Subject: KVM: VMX: Fix incorrect lookup of segment S flag in
 fix_pmode_dataseg()

fix_pmode_dataseg() looks up S in ->base instead of ->ar_bytes.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4e49caf9224d..1d93079432b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2693,11 +2693,11 @@ static __exit void hardware_unsetup(void)
 	free_kvm_area();
 }
 
-static void fix_pmode_dataseg(int seg, struct kvm_save_segment *save)
+static void fix_pmode_dataseg(int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
-	if (vmcs_readl(sf->base) == save->base && (save->base & AR_S_MASK)) {
+	if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) {
 		vmcs_write16(sf->selector, save->selector);
 		vmcs_writel(sf->base, save->base);
 		vmcs_write32(sf->limit, save->limit);
-- 
cgit v1.2.3


From f5f7b2fe3bf849b58c8144729aba78b8e29e1e4c Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:00 +0300
Subject: KVM: VMX: Use kvm_segment to save protected-mode segments when
 entering realmode

Instead of using struct kvm_save_segment, use struct kvm_segment, which is what
the other APIs use.  This leads to some simplification.

We replace save_rmode_seg() with a call to vmx_save_segment().  Since this depends
on rmode.vm86_active, we move the call to before setting the flag.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 85 +++++++++++++++---------------------------------------
 1 file changed, 24 insertions(+), 61 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1d93079432b3..7e95ff68b9da 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -405,16 +405,16 @@ struct vcpu_vmx {
 	struct {
 		int vm86_active;
 		ulong save_rflags;
+		struct kvm_segment segs[8];
+	} rmode;
+	struct {
+		u32 bitmask; /* 4 bits per segment (1 bit per field) */
 		struct kvm_save_segment {
 			u16 selector;
 			unsigned long base;
 			u32 limit;
 			u32 ar;
-		} tr, es, ds, fs, gs;
-	} rmode;
-	struct {
-		u32 bitmask; /* 4 bits per segment (1 bit per field) */
-		struct kvm_save_segment seg[8];
+		} seg[8];
 	} segment_cache;
 	int vpid;
 	bool emulation_required;
@@ -2693,15 +2693,12 @@ static __exit void hardware_unsetup(void)
 	free_kvm_area();
 }
 
-static void fix_pmode_dataseg(int seg, struct kvm_segment *save)
+static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
-	if (vmcs_readl(sf->base) == save->base && (save->ar_bytes & AR_S_MASK)) {
-		vmcs_write16(sf->selector, save->selector);
-		vmcs_writel(sf->base, save->base);
-		vmcs_write32(sf->limit, save->limit);
-		vmcs_write32(sf->ar_bytes, save->ar);
+	if (vmcs_readl(sf->base) == save->base && save->s) {
+		vmx_set_segment(vcpu, save, seg);
 	} else {
 		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
 			<< AR_DPL_SHIFT;
@@ -2719,10 +2716,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 	vmx_segment_cache_clear(vmx);
 
-	vmcs_write16(GUEST_TR_SELECTOR, vmx->rmode.tr.selector);
-	vmcs_writel(GUEST_TR_BASE, vmx->rmode.tr.base);
-	vmcs_write32(GUEST_TR_LIMIT, vmx->rmode.tr.limit);
-	vmcs_write32(GUEST_TR_AR_BYTES, vmx->rmode.tr.ar);
+	vmx_set_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
 
 	flags = vmcs_readl(GUEST_RFLAGS);
 	flags &= RMODE_GUEST_OWNED_EFLAGS_BITS;
@@ -2737,10 +2731,10 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 	if (emulate_invalid_guest_state)
 		return;
 
-	fix_pmode_dataseg(VCPU_SREG_ES, &vmx->rmode.es);
-	fix_pmode_dataseg(VCPU_SREG_DS, &vmx->rmode.ds);
-	fix_pmode_dataseg(VCPU_SREG_GS, &vmx->rmode.gs);
-	fix_pmode_dataseg(VCPU_SREG_FS, &vmx->rmode.fs);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_ES, &vmx->rmode.segs[VCPU_SREG_ES]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_DS, &vmx->rmode.segs[VCPU_SREG_DS]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_FS, &vmx->rmode.segs[VCPU_SREG_FS]);
+	fix_pmode_dataseg(vcpu, VCPU_SREG_GS, &vmx->rmode.segs[VCPU_SREG_GS]);
 
 	vmx_segment_cache_clear(vmx);
 
@@ -2768,17 +2762,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 	return kvm->arch.tss_addr;
 }
 
-static void save_rmode_seg(int seg, struct kvm_save_segment *save)
-{
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
-
-	save->selector = vmcs_read16(sf->selector);
-	save->base = vmcs_readl(sf->base);
-	save->limit = vmcs_read32(sf->limit);
-	save->ar = vmcs_read32(sf->ar_bytes);
-}
-
-static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
+static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
@@ -2801,14 +2785,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 	if (enable_unrestricted_guest)
 		return;
 
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_FS], VCPU_SREG_FS);
+	vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_GS], VCPU_SREG_GS);
+
 	vmx->emulation_required = 1;
 	vmx->rmode.vm86_active = 1;
 
-	save_rmode_seg(VCPU_SREG_TR, &vmx->rmode.tr);
-	save_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
-	save_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
-	save_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
-	save_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
 
 	/*
 	 * Very old userspace does not call KVM_SET_TSS_ADDR before entering
@@ -3118,7 +3103,6 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_save_segment *save;
 	u32 ar;
 
 	if (vmx->rmode.vm86_active
@@ -3126,27 +3110,15 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 		|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
 		|| seg == VCPU_SREG_GS)
 	    && !emulate_invalid_guest_state) {
-		switch (seg) {
-		case VCPU_SREG_TR: save = &vmx->rmode.tr; break;
-		case VCPU_SREG_ES: save = &vmx->rmode.es; break;
-		case VCPU_SREG_DS: save = &vmx->rmode.ds; break;
-		case VCPU_SREG_FS: save = &vmx->rmode.fs; break;
-		case VCPU_SREG_GS: save = &vmx->rmode.gs; break;
-		default: BUG();
-		}
-		var->selector = save->selector;
-		var->base = save->base;
-		var->limit = save->limit;
-		ar = save->ar;
+		*var = vmx->rmode.segs[seg];
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
-			goto use_saved_rmode_seg;
+			return;
 	}
 	var->base = vmx_read_guest_seg_base(vmx, seg);
 	var->limit = vmx_read_guest_seg_limit(vmx, seg);
 	var->selector = vmx_read_guest_seg_selector(vmx, seg);
 	ar = vmx_read_guest_seg_ar(vmx, seg);
-use_saved_rmode_seg:
 	if ((ar & AR_UNUSABLE_MASK) && !emulate_invalid_guest_state)
 		ar = 0;
 	var->type = ar & 15;
@@ -3235,10 +3207,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 
 	if (vmx->rmode.vm86_active && seg == VCPU_SREG_TR) {
 		vmcs_write16(sf->selector, var->selector);
-		vmx->rmode.tr.selector = var->selector;
-		vmx->rmode.tr.base = var->base;
-		vmx->rmode.tr.limit = var->limit;
-		vmx->rmode.tr.ar = vmx_segment_access_rights(var);
+		vmx->rmode.segs[VCPU_SREG_TR] = *var;
 		return;
 	}
 	vmcs_writel(sf->base, var->base);
@@ -3289,16 +3258,10 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 				     vmcs_readl(GUEST_CS_BASE) >> 4);
 			break;
 		case VCPU_SREG_ES:
-			fix_rmode_seg(VCPU_SREG_ES, &vmx->rmode.es);
-			break;
 		case VCPU_SREG_DS:
-			fix_rmode_seg(VCPU_SREG_DS, &vmx->rmode.ds);
-			break;
 		case VCPU_SREG_GS:
-			fix_rmode_seg(VCPU_SREG_GS, &vmx->rmode.gs);
-			break;
 		case VCPU_SREG_FS:
-			fix_rmode_seg(VCPU_SREG_FS, &vmx->rmode.fs);
+			fix_rmode_seg(seg, &vmx->rmode.segs[seg]);
 			break;
 		case VCPU_SREG_SS:
 			vmcs_write16(GUEST_SS_SELECTOR,
-- 
cgit v1.2.3


From c865c43de66dc973865bda337022f03b6e16c8df Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:01 +0300
Subject: KVM: VMX: Retain limit and attributes when entering protected mode

Real processors don't change segment limits and attributes while in
real mode.  Mimic that behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7e95ff68b9da..88eeb405560f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2696,14 +2696,14 @@ static __exit void hardware_unsetup(void)
 static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	struct kvm_segment tmp = *save;
 
-	if (vmcs_readl(sf->base) == save->base && save->s) {
-		vmx_set_segment(vcpu, save, seg);
-	} else {
-		u32 dpl = (vmcs_read16(sf->selector) & SELECTOR_RPL_MASK)
-			<< AR_DPL_SHIFT;
-		vmcs_write32(sf->ar_bytes, 0x93 | dpl);
+	if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
+		tmp.base = vmcs_readl(sf->base);
+		tmp.selector = vmcs_read16(sf->selector);
+		tmp.s = 1;
 	}
+	vmx_set_segment(vcpu, &tmp, seg);
 }
 
 static void enter_pmode(struct kvm_vcpu *vcpu)
-- 
cgit v1.2.3


From 495e116684cebc5ae625916aba37fc07f345707b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:02 +0300
Subject: KVM: VMX: Allow real mode emulation using vm86 with dpl=0

Real mode is always entered from protected mode with dpl=0.  Since
the dpl doesn't affect execution, and we already override it to 3
in the vmcs (as vmx requires), we can allow execution in that state.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 88eeb405560f..4811d91759a1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3317,7 +3317,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (var.limit != 0xffff)
 		return false;
-	if (ar != 0xf3)
+	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From e2a610d7fc3e285af8061ff071761752255d95f6 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:03 +0300
Subject: KVM: VMX: Allow vm86 virtualization of big real mode

Usually, big real mode uses large (4GB) segments.  Currently we don't
virtualize this; if any segment has a limit other than 0xffff, we emulate.
But if we set the vmx-visible limit to 0xffff, we can use vm86 to virtualize
real mode; if an access overruns the segment limit, the guest will #GP, which
we will trap and forward to the emulator.  This results in significantly
faster execution, and less risk of hitting an unemulated instruction.

If the limit is less than 0xffff, we retain the existing behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 4811d91759a1..fd21eb454663 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3315,7 +3315,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 
 	if (var.base != (var.selector << 4))
 		return false;
-	if (var.limit != 0xffff)
+	if (var.limit < 0xffff)
 		return false;
 	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
 		return false;
-- 
cgit v1.2.3


From 03ebebeb1ff5d1d6209fd8df4ffc9204df82bd55 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:04 +0300
Subject: KVM: x86 emulator: Leave segment limit and attributs alone in real
 mode

When loading a segment in real mode, only the base and selector must
be modified.  The limit needs to be left alone, otherwise big real mode
users will hit a #GP due to limit checking (currently this is suppressed
because we don't check limits in real mode).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5e27ba532613..f8b27cd2a6cb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1388,19 +1388,15 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	bool null_selector = !(selector & ~0x3); /* 0000-0003 are null */
 	ulong desc_addr;
 	int ret;
+	u16 dummy;
 
 	memset(&seg_desc, 0, sizeof seg_desc);
 
 	if ((seg <= VCPU_SREG_GS && ctxt->mode == X86EMUL_MODE_VM86)
 	    || ctxt->mode == X86EMUL_MODE_REAL) {
 		/* set real mode segment descriptor */
+		ctxt->ops->get_segment(ctxt, &dummy, &seg_desc, NULL, seg);
 		set_desc_base(&seg_desc, selector << 4);
-		set_desc_limit(&seg_desc, 0xffff);
-		seg_desc.type = 3;
-		seg_desc.p = 1;
-		seg_desc.s = 1;
-		if (ctxt->mode == X86EMUL_MODE_VM86)
-			seg_desc.dpl = 3;
 		goto load;
 	}
 
-- 
cgit v1.2.3


From a5625189f6810ef79ced53989c794acfa10d3370 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:05 +0300
Subject: KVM: x86 emulator: Check segment limits in real mode too

Segment limits are verified in real mode, not just protected mode.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index f8b27cd2a6cb..5b1c701cd6d0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -668,8 +668,6 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	switch (ctxt->mode) {
-	case X86EMUL_MODE_REAL:
-		break;
 	case X86EMUL_MODE_PROT64:
 		if (((signed long)la << 16) >> 16 != la)
 			return emulate_gp(ctxt, 0);
@@ -699,7 +697,10 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 				goto bad;
 		}
 		cpl = ctxt->ops->cpl(ctxt);
-		rpl = sel & 3;
+		if (ctxt->mode == X86EMUL_MODE_REAL)
+			rpl = 0;
+		else
+			rpl = sel & 3;
 		cpl = max(cpl, rpl);
 		if (!(desc.type & 8)) {
 			/* data segment */
-- 
cgit v1.2.3


From 0afbe2f8781a812c7e501ec129eff45b21f792af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:06 +0300
Subject: KVM: x86 emulator: Fix #GP error code during linearization

We want the segment selector, nor segment number.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/emulate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5b1c701cd6d0..1451cffd97eb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -725,9 +725,9 @@ static int __linearize(struct x86_emulate_ctxt *ctxt,
 	return X86EMUL_CONTINUE;
 bad:
 	if (addr.seg == VCPU_SREG_SS)
-		return emulate_ss(ctxt, addr.seg);
+		return emulate_ss(ctxt, sel);
 	else
-		return emulate_gp(ctxt, addr.seg);
+		return emulate_gp(ctxt, sel);
 }
 
 static int linearize(struct x86_emulate_ctxt *ctxt,
-- 
cgit v1.2.3


From 726364202853f843a97a8ba4a7c3cd91e3aa84b7 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:07 +0300
Subject: KVM: VMX: Return real real-mode segment data even if
 emulate_invalid_guest_state=1

emulate_invalid_guest_state=1 doesn't mean we don't munge the segments in the
vmcs; we do.  So we need to return the real ones (maintained by vmx_set_segment).

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fd21eb454663..0d6872621ab5 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3108,8 +3108,7 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 	if (vmx->rmode.vm86_active
 	    && (seg == VCPU_SREG_TR || seg == VCPU_SREG_ES
 		|| seg == VCPU_SREG_DS || seg == VCPU_SREG_FS
-		|| seg == VCPU_SREG_GS)
-	    && !emulate_invalid_guest_state) {
+		|| seg == VCPU_SREG_GS)) {
 		*var = vmx->rmode.segs[seg];
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
-- 
cgit v1.2.3


From 1390a28b274e2e45f89bac67c435cbcbc5cc0790 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:08 +0300
Subject: KVM: VMX: Preserve segment limit and access rights in real mode

While this is undocumented, real processors do not reload the segment
limit and access rights when loading a segment register in real mode.
Real programs rely on it so we need to comply with this behaviour.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0d6872621ab5..6e6421aeca03 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3113,6 +3113,9 @@ static void vmx_get_segment(struct kvm_vcpu *vcpu,
 		if (seg == VCPU_SREG_TR
 		    || var->selector == vmx_read_guest_seg_selector(vmx, seg))
 			return;
+		var->base = vmx_read_guest_seg_base(vmx, seg);
+		var->selector = vmx_read_guest_seg_selector(vmx, seg);
+		return;
 	}
 	var->base = vmx_read_guest_seg_base(vmx, seg);
 	var->limit = vmx_read_guest_seg_limit(vmx, seg);
-- 
cgit v1.2.3


From ce5668034752e52e995c8dc625337380099a088a Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:09 +0300
Subject: KVM: VMX: Save all segment data in real mode

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6e6421aeca03..62b2d0cf2a7f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3216,6 +3216,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 	vmcs_write32(sf->limit, var->limit);
 	vmcs_write16(sf->selector, var->selector);
 	if (vmx->rmode.vm86_active && var->s) {
+		vmx->rmode.segs[seg] = *var;
 		/*
 		 * Hack real-mode segments into vm86 compatibility.
 		 */
-- 
cgit v1.2.3


From a81aba14dc0ea499f4c218b5db0303b2ea8151d3 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Tue, 21 Aug 2012 17:07:10 +0300
Subject: KVM: VMX: Ignore segment G and D bits when considering whether we can
 virtualize

We will enter the guest with G and D cleared; as real hardware ignores D in
real mode, and G is taken care of by the limit test, we allow more code to
run in vm86 mode.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 62b2d0cf2a7f..248c2b490e9b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3320,7 +3320,7 @@ static bool rmode_segment_valid(struct kvm_vcpu *vcpu, int seg)
 		return false;
 	if (var.limit < 0xffff)
 		return false;
-	if ((ar | (3 << AR_DPL_SHIFT)) != 0xf3)
+	if (((ar | (3 << AR_DPL_SHIFT)) & ~(AR_G_MASK | AR_DB_MASK)) != 0xf3)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 9a7819774e4236e8736a074b7e85276967911924 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 30 Aug 2012 17:45:54 -0300
Subject: KVM: x86: remove unused variable from kvm_task_switch()

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e00050ce7a6a..20f2266dfb64 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5714,7 +5714,6 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 {
 	struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt;
 	int ret;
-	unsigned reg;
 
 	init_emulate_ctxt(vcpu);
 
-- 
cgit v1.2.3


From e09a8417823992700c063f9b8f85119f2d9016ed Mon Sep 17 00:00:00 2001
From: Jon Mason <jdmason@kudzu.us>
Date: Tue, 10 Jul 2012 15:31:30 -0700
Subject: hpet: Remove unused PCI Vendor ID #define

HPET_ID_VENDOR_8086 is defined but never used.  It would be a redefine
of PCI_VENDOR_ID_INTEL if it was ever used.

Signed-off-by: Jon Mason <jdmason@kudzu.us>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 arch/x86/include/asm/hpet.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hpet.h b/arch/x86/include/asm/hpet.h
index 2c392d663dce..434e2106cc87 100644
--- a/arch/x86/include/asm/hpet.h
+++ b/arch/x86/include/asm/hpet.h
@@ -35,8 +35,6 @@
 #define	HPET_ID_NUMBER_SHIFT	8
 #define HPET_ID_VENDOR_SHIFT	16
 
-#define HPET_ID_VENDOR_8086	0x8086
-
 #define HPET_CFG_ENABLE		0x001
 #define HPET_CFG_LEGACY		0x002
 #define	HPET_LEGACY_8254	2
-- 
cgit v1.2.3


From ec798660cf72c981ad8eed272487a0fe2b3222f2 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 14:47:25 +0300
Subject: KVM: cleanup pic reset

kvm_pic_reset() is not used anywhere. Move reset logic from
pic_ioport_write() there.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 52 +++++++++++-----------------------------------------
 1 file changed, 11 insertions(+), 41 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 90c84f947d45..848206df0967 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -275,23 +275,20 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 {
 	int irq, i;
 	struct kvm_vcpu *vcpu;
-	u8 irr = s->irr, isr = s->imr;
+	u8 edge_irr = s->irr & ~s->elcr;
 	bool found = false;
 
 	s->last_irr = 0;
-	s->irr = 0;
+	s->irr &= s->elcr;
 	s->imr = 0;
-	s->isr = 0;
 	s->priority_add = 0;
-	s->irq_base = 0;
-	s->read_reg_select = 0;
-	s->poll = 0;
 	s->special_mask = 0;
-	s->init_state = 0;
-	s->auto_eoi = 0;
-	s->rotate_on_auto_eoi = 0;
-	s->special_fully_nested_mode = 0;
-	s->init4 = 0;
+	s->read_reg_select = 0;
+	if (!s->init4) {
+		s->special_fully_nested_mode = 0;
+		s->auto_eoi = 0;
+	}
+	s->init_state = 1;
 
 	kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
 		if (kvm_apic_accept_pic_intr(vcpu)) {
@@ -304,7 +301,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 		return;
 
 	for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
-		if (irr & (1 << irq) || isr & (1 << irq))
+		if (edge_irr & (1 << irq))
 			pic_clear_isr(s, irq);
 }
 
@@ -316,40 +313,13 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 	addr &= 1;
 	if (addr == 0) {
 		if (val & 0x10) {
-			u8 edge_irr = s->irr & ~s->elcr;
-			int i;
-			bool found;
-			struct kvm_vcpu *vcpu;
-
 			s->init4 = val & 1;
-			s->last_irr = 0;
-			s->irr &= s->elcr;
-			s->imr = 0;
-			s->priority_add = 0;
-			s->special_mask = 0;
-			s->read_reg_select = 0;
-			if (!s->init4) {
-				s->special_fully_nested_mode = 0;
-				s->auto_eoi = 0;
-			}
-			s->init_state = 1;
 			if (val & 0x02)
 				pr_pic_unimpl("single mode not supported");
 			if (val & 0x08)
 				pr_pic_unimpl(
-					"level sensitive irq not supported");
-
-			kvm_for_each_vcpu(i, vcpu, s->pics_state->kvm)
-				if (kvm_apic_accept_pic_intr(vcpu)) {
-					found = true;
-					break;
-				}
-
-
-			if (found)
-				for (irq = 0; irq < PIC_NUM_PINS/2; irq++)
-					if (edge_irr & (1 << irq))
-						pic_clear_isr(s, irq);
+						"level sensitive irq not supported");
+			kvm_pic_reset(s);
 		} else if (val & 0x08) {
 			if (val & 0x04)
 				s->poll = 1;
-- 
cgit v1.2.3


From 749c59fd15b2c18dd6c15c353a899fb6ac49b865 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Thu, 30 Aug 2012 11:32:13 +0100
Subject: KVM: PIC: fix use of uninitialised variable.

Commit aea218f3cbbc (KVM: PIC: call ack notifiers for irqs that are
dropped form irr) used an uninitialised variable to track whether an
appropriate apic had been found.  This could result in calling the ack
notifier incorrectly.

Cc: Gleb Natapov <gleb@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/i8259.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index e498b18f010c..9fc9aa7ac703 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -318,7 +318,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 		if (val & 0x10) {
 			u8 edge_irr = s->irr & ~s->elcr;
 			int i;
-			bool found;
+			bool found = false;
 			struct kvm_vcpu *vcpu;
 
 			s->init4 = val & 1;
-- 
cgit v1.2.3


From 3ec18cd8b8f8395d0df604c62ab3bc2cf3a966b4 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 20 Aug 2012 11:24:21 +0200
Subject: perf/x86: Enable Intel Cedarview Atom suppport

This patch enables perf_events support for Intel Cedarview
Atom (model 54) processors. Support includes PEBS and LBR.
Tested on my Atom N2600 netbook.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120820092421.GA11284@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c     | 1 +
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 7f2739e03e79..0d3d63afa76a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2008,6 +2008,7 @@ __init int intel_pmu_init(void)
 		break;
 
 	case 28: /* Atom */
+	case 54: /* Cedariew */
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 520b4265fcd2..da02e9cc3754 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -686,7 +686,8 @@ void intel_pmu_lbr_init_atom(void)
 	 * to have an operational LBR which can freeze
 	 * on PMU interrupt
 	 */
-	if (boot_cpu_data.x86_mask < 10) {
+	if (boot_cpu_data.x86_model == 28
+	    && boot_cpu_data.x86_mask < 10) {
 		pr_cont("LBR disabled due to erratum");
 		return;
 	}
-- 
cgit v1.2.3


From 406eae5d70c1a849f4a050f708a459c2349e9dda Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:20:56 -0700
Subject: x86/Kconfig: Update defconfigs to current results of "make
 savedefconfig"

The x86 defconfigs have become somewhat out of date compared to
the current result of "make savedefconfig".  Update them to the
current output, as a prelude to further defconfig changes, to
avoid unrelated noise in those further changes.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/80c8a5fbeaf6cdb72fb78a016013427efee52668.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 12 ++++--------
 arch/x86/configs/x86_64_defconfig | 12 ++++--------
 2 files changed, 8 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 119db67dcb03..19034087c1b7 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
 CONFIG_SUN_PARTITION=y
 CONFIG_KARMA_PARTITION=y
 CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_X86_GENERIC=y
 CONFIG_HPET_TIMER=y
@@ -231,8 +231,6 @@ CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
 CONFIG_HID_GYRATION=y
 CONFIG_LOGITECH_FF=y
 CONFIG_HID_NTRIG=y
@@ -243,11 +241,11 @@ CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_DEBUG=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
@@ -299,7 +296,6 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 76eb2903809f..c2c044846c70 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -8,6 +8,8 @@ CONFIG_TASK_DELAY_ACCT=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
+CONFIG_NO_HZ=y
+CONFIG_HIGH_RES_TIMERS=y
 CONFIG_LOG_BUF_SHIFT=18
 CONFIG_CGROUPS=y
 CONFIG_CGROUP_FREEZER=y
@@ -34,8 +36,6 @@ CONFIG_SGI_PARTITION=y
 CONFIG_SUN_PARTITION=y
 CONFIG_KARMA_PARTITION=y
 CONFIG_EFI_PARTITION=y
-CONFIG_NO_HZ=y
-CONFIG_HIGH_RES_TIMERS=y
 CONFIG_SMP=y
 CONFIG_CALGARY_IOMMU=y
 CONFIG_NR_CPUS=64
@@ -227,8 +227,6 @@ CONFIG_SND_HRTIMER=y
 CONFIG_SND_HDA_INTEL=y
 CONFIG_SND_HDA_HWDEP=y
 CONFIG_HIDRAW=y
-CONFIG_HID_PID=y
-CONFIG_USB_HIDDEV=y
 CONFIG_HID_GYRATION=y
 CONFIG_LOGITECH_FF=y
 CONFIG_HID_NTRIG=y
@@ -239,11 +237,11 @@ CONFIG_HID_SAMSUNG=y
 CONFIG_HID_SONY=y
 CONFIG_HID_SUNPLUS=y
 CONFIG_HID_TOPSEED=y
+CONFIG_HID_PID=y
+CONFIG_USB_HIDDEV=y
 CONFIG_USB=y
 CONFIG_USB_DEBUG=y
 CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
-CONFIG_USB_DEVICEFS=y
-# CONFIG_USB_DEVICE_CLASS is not set
 CONFIG_USB_MON=y
 CONFIG_USB_EHCI_HCD=y
 # CONFIG_USB_EHCI_TT_NEWSCHED is not set
@@ -280,7 +278,6 @@ CONFIG_PROC_KCORE=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_HUGETLBFS=y
 CONFIG_NFS_FS=y
-CONFIG_NFS_V3=y
 CONFIG_NFS_V3_ACL=y
 CONFIG_NFS_V4=y
 CONFIG_ROOT_NFS=y
@@ -298,7 +295,6 @@ CONFIG_DEBUG_KERNEL=y
 CONFIG_SCHEDSTATS=y
 CONFIG_TIMER_STATS=y
 CONFIG_DEBUG_STACK_USAGE=y
-CONFIG_SYSCTL_SYSCALL_CHECK=y
 CONFIG_BLK_DEV_IO_TRACE=y
 CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
-- 
cgit v1.2.3


From 3fe2cb8f9e9486980ff03d7e020781cfdb028ffa Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:05 -0700
Subject: x86/Kconfig: Switch to ext4 in defconfigs

The current x86 and x86-64 defconfigs do not enable ext4, which
most current distributions default to.  Switch the defconfigs to
ext4, so they will boot on current systems without additional
configuration.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/bd8a359506b7e1287c680823de16d67608ec52fe.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 7 +++----
 arch/x86/configs/x86_64_defconfig | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 19034087c1b7..2701b8a87752 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -260,10 +260,9 @@ CONFIG_RTC_CLASS=y
 CONFIG_DMADEVICES=y
 CONFIG_EEEPC_LAPTOP=y
 CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index c2c044846c70..c17614efb98f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -260,10 +260,9 @@ CONFIG_AMD_IOMMU_STATS=y
 CONFIG_INTEL_IOMMU=y
 # CONFIG_INTEL_IOMMU_DEFAULT_ON is not set
 CONFIG_EFI_VARS=y
-CONFIG_EXT3_FS=y
-# CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
-CONFIG_EXT3_FS_POSIX_ACL=y
-CONFIG_EXT3_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
 CONFIG_QUOTA=y
 CONFIG_QUOTA_NETLINK_INTERFACE=y
 # CONFIG_PRINT_QUOTA_WARNING is not set
-- 
cgit v1.2.3


From b92f885f31db782608c8b73942111238020b1f4a Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:13 -0700
Subject: x86/Kconfig: Disable CONFIG_CRC_T10DIF in defconfigs

CONFIG_CRC_T10DIF explicitly states that it exists only for use
by out-of-tree modules; anything in-kernel that needs it selects
it.  Thus, compile it out by default.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/3aaff7a0af1320427952d411a21b8ded29747a1f.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 1 -
 arch/x86/configs/x86_64_defconfig | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index 2701b8a87752..d833bb6d0161 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -311,4 +311,3 @@ CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 CONFIG_CRYPTO_AES_586=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index c17614efb98f..7ddcd99b2401 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -309,4 +309,3 @@ CONFIG_SECURITY_SELINUX=y
 CONFIG_SECURITY_SELINUX_BOOTPARAM=y
 CONFIG_SECURITY_SELINUX_DISABLE=y
 # CONFIG_CRYPTO_ANSI_CPRNG is not set
-CONFIG_CRC_T10DIF=y
-- 
cgit v1.2.3


From be2f328d8616c81b86e4013974608776c317bbeb Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:21 -0700
Subject: x86/Kconfig: Turn off CONFIG_BLK_DEV_RAM

The vast majority of systems either use initramfs or mount a
root filesystem directly from the kernel.  Distros have
defaulted to initramfs for years.  Only highly specialized
systems would use an actual filesystem-image initrd at this
point, and such systems don't rely on defconfig anyway.  Drop
initrd support (and specifically RAM block device support) from
the defconfigs.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/2521e983a63595cd7a331236d929577660f89c72.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 2 --
 arch/x86/configs/x86_64_defconfig | 2 --
 2 files changed, 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index d833bb6d0161..a6533a292229 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEBUG_DEVRES=y
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 7ddcd99b2401..18f3cc47ab8f 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -144,8 +144,6 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_DEBUG_DEVRES=y
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
-CONFIG_BLK_DEV_RAM=y
-CONFIG_BLK_DEV_RAM_SIZE=16384
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_BLK_DEV_SR_VENDOR=y
-- 
cgit v1.2.3


From c206b9dcb1fc41b104db43e98f9b83a8c0c559ae Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sun, 2 Sep 2012 22:21:29 -0700
Subject: x86/Kconfig: Turn off DEBUG_NX_TEST module in defconfigs

The x86 defconfigs include exactly one module: test_nx.ko, a
special-purpose module which just exists to do evil things like
executing code off the stack to see if the kernel has enabled NX
support.  Anyone who actually uses that module can easily enable
it themselves, but the vast majority of kernel builds don't need
it; disable it by default.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Cc: Randy Dunlap <rdunlap@xenotime.net>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Link: http://lkml.kernel.org/r/e72faf875e1172fb1cbec5e6d3cd4122df508a97.1346649518.git.josh@joshtriplett.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/configs/i386_defconfig   | 1 -
 arch/x86/configs/x86_64_defconfig | 1 -
 2 files changed, 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index a6533a292229..5598547281a7 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -298,7 +298,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 18f3cc47ab8f..671524d0f6c0 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -297,7 +297,6 @@ CONFIG_PROVIDE_OHCI1394_DMA_INIT=y
 CONFIG_EARLY_PRINTK_DBGP=y
 CONFIG_DEBUG_STACKOVERFLOW=y
 # CONFIG_DEBUG_RODATA_TEST is not set
-CONFIG_DEBUG_NX_TEST=m
 CONFIG_DEBUG_BOOT_PARAMS=y
 CONFIG_OPTIMIZE_INLINING=y
 CONFIG_KEYS_DEBUG_PROC_KEYS=y
-- 
cgit v1.2.3


From 4454d32749465ffa77d82bc1fdd196d6dedc544b Mon Sep 17 00:00:00 2001
From: Joe Millenbach <jmillenbach@gmail.com>
Date: Sun, 2 Sep 2012 17:38:20 -0700
Subject: x86/kconfig: Remove outdated reference to Intel CPUs in
 CONFIG_SWIOTLB

Deleted the no longer valid example of which x86 CPUs lack a
hardware IOMMU, and moved the "If unsure..." statement to a new
line to follow the style of surrounding options.

Signed-off-by: Joe Millenbach <jmillenbach@gmail.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Cc: team-fjord@googlegroups.com
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Link: http://lkml.kernel.org/r/1346632700-29113-1-git-send-email-jmillenbach@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..50a1d1f9b6d3 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -746,10 +746,10 @@ config SWIOTLB
 	def_bool y if X86_64
 	---help---
 	  Support for software bounce buffers used on x86-64 systems
-	  which don't have a hardware IOMMU (e.g. the current generation
-	  of Intel's x86-64 CPUs). Using this PCI devices which can only
-	  access 32-bits of memory can be used on systems with more than
-	  3 GB of memory. If unsure, say Y.
+	  which don't have a hardware IOMMU. Using this PCI devices
+	  which can only access 32-bits of memory can be used on systems
+	  with more than 3 GB of memory.
+	  If unsure, say Y.
 
 config IOMMU_HELPER
 	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
-- 
cgit v1.2.3


From 326d07cb30fed2387efccd4bf3bd8e4f28719e9e Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:13 +0200
Subject: KVM: x86: minor size optimization

Some fields can be constified and/or made static to reduce code and data
size.

Numbers for a 32 bit build:

        text    data     bss     dec     hex filename
before: 3351      80       0    3431     d67 cpuid.o
 after: 3391       0       0    3391     d3f cpuid.o

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/cpuid.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b496da684bd6..ec79e773342e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -397,8 +397,8 @@ static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 		break;
 	}
 	case KVM_CPUID_SIGNATURE: {
-		char signature[12] = "KVMKVMKVM\0\0";
-		u32 *sigptr = (u32 *)signature;
+		static const char signature[12] = "KVMKVMKVM\0\0";
+		const u32 *sigptr = (const u32 *)signature;
 		entry->eax = KVM_CPUID_FEATURES;
 		entry->ebx = sigptr[0];
 		entry->ecx = sigptr[1];
@@ -484,10 +484,10 @@ struct kvm_cpuid_param {
 	u32 func;
 	u32 idx;
 	bool has_leaf_count;
-	bool (*qualifier)(struct kvm_cpuid_param *param);
+	bool (*qualifier)(const struct kvm_cpuid_param *param);
 };
 
-static bool is_centaur_cpu(struct kvm_cpuid_param *param)
+static bool is_centaur_cpu(const struct kvm_cpuid_param *param)
 {
 	return boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR;
 }
@@ -498,7 +498,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 	struct kvm_cpuid_entry2 *cpuid_entries;
 	int limit, nent = 0, r = -E2BIG, i;
 	u32 func;
-	static struct kvm_cpuid_param param[] = {
+	static const struct kvm_cpuid_param param[] = {
 		{ .func = 0, .has_leaf_count = true },
 		{ .func = 0x80000000, .has_leaf_count = true },
 		{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
@@ -517,7 +517,7 @@ int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid,
 
 	r = 0;
 	for (i = 0; i < ARRAY_SIZE(param); i++) {
-		struct kvm_cpuid_param *ent = &param[i];
+		const struct kvm_cpuid_param *ent = &param[i];
 
 		if (ent->qualifier && !ent->qualifier(ent))
 			continue;
-- 
cgit v1.2.3


From 89a87c67791b840d815a2028b88cefe6906ed42c Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:14 +0200
Subject: KVM: x86 emulator: use aligned variants of SSE register ops

As the the compiler ensures that the memory operand is always aligned
to a 16 byte memory location, use the aligned variant of MOVDQ for
read_sse_reg() and write_sse_reg().

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 64 +++++++++++++++++++++++++-------------------------
 1 file changed, 32 insertions(+), 32 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 1451cffd97eb..5a0fee1a19c9 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -909,23 +909,23 @@ static void read_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data, int reg)
 {
 	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
-	case 0: asm("movdqu %%xmm0, %0" : "=m"(*data)); break;
-	case 1: asm("movdqu %%xmm1, %0" : "=m"(*data)); break;
-	case 2: asm("movdqu %%xmm2, %0" : "=m"(*data)); break;
-	case 3: asm("movdqu %%xmm3, %0" : "=m"(*data)); break;
-	case 4: asm("movdqu %%xmm4, %0" : "=m"(*data)); break;
-	case 5: asm("movdqu %%xmm5, %0" : "=m"(*data)); break;
-	case 6: asm("movdqu %%xmm6, %0" : "=m"(*data)); break;
-	case 7: asm("movdqu %%xmm7, %0" : "=m"(*data)); break;
+	case 0: asm("movdqa %%xmm0, %0" : "=m"(*data)); break;
+	case 1: asm("movdqa %%xmm1, %0" : "=m"(*data)); break;
+	case 2: asm("movdqa %%xmm2, %0" : "=m"(*data)); break;
+	case 3: asm("movdqa %%xmm3, %0" : "=m"(*data)); break;
+	case 4: asm("movdqa %%xmm4, %0" : "=m"(*data)); break;
+	case 5: asm("movdqa %%xmm5, %0" : "=m"(*data)); break;
+	case 6: asm("movdqa %%xmm6, %0" : "=m"(*data)); break;
+	case 7: asm("movdqa %%xmm7, %0" : "=m"(*data)); break;
 #ifdef CONFIG_X86_64
-	case 8: asm("movdqu %%xmm8, %0" : "=m"(*data)); break;
-	case 9: asm("movdqu %%xmm9, %0" : "=m"(*data)); break;
-	case 10: asm("movdqu %%xmm10, %0" : "=m"(*data)); break;
-	case 11: asm("movdqu %%xmm11, %0" : "=m"(*data)); break;
-	case 12: asm("movdqu %%xmm12, %0" : "=m"(*data)); break;
-	case 13: asm("movdqu %%xmm13, %0" : "=m"(*data)); break;
-	case 14: asm("movdqu %%xmm14, %0" : "=m"(*data)); break;
-	case 15: asm("movdqu %%xmm15, %0" : "=m"(*data)); break;
+	case 8: asm("movdqa %%xmm8, %0" : "=m"(*data)); break;
+	case 9: asm("movdqa %%xmm9, %0" : "=m"(*data)); break;
+	case 10: asm("movdqa %%xmm10, %0" : "=m"(*data)); break;
+	case 11: asm("movdqa %%xmm11, %0" : "=m"(*data)); break;
+	case 12: asm("movdqa %%xmm12, %0" : "=m"(*data)); break;
+	case 13: asm("movdqa %%xmm13, %0" : "=m"(*data)); break;
+	case 14: asm("movdqa %%xmm14, %0" : "=m"(*data)); break;
+	case 15: asm("movdqa %%xmm15, %0" : "=m"(*data)); break;
 #endif
 	default: BUG();
 	}
@@ -937,23 +937,23 @@ static void write_sse_reg(struct x86_emulate_ctxt *ctxt, sse128_t *data,
 {
 	ctxt->ops->get_fpu(ctxt);
 	switch (reg) {
-	case 0: asm("movdqu %0, %%xmm0" : : "m"(*data)); break;
-	case 1: asm("movdqu %0, %%xmm1" : : "m"(*data)); break;
-	case 2: asm("movdqu %0, %%xmm2" : : "m"(*data)); break;
-	case 3: asm("movdqu %0, %%xmm3" : : "m"(*data)); break;
-	case 4: asm("movdqu %0, %%xmm4" : : "m"(*data)); break;
-	case 5: asm("movdqu %0, %%xmm5" : : "m"(*data)); break;
-	case 6: asm("movdqu %0, %%xmm6" : : "m"(*data)); break;
-	case 7: asm("movdqu %0, %%xmm7" : : "m"(*data)); break;
+	case 0: asm("movdqa %0, %%xmm0" : : "m"(*data)); break;
+	case 1: asm("movdqa %0, %%xmm1" : : "m"(*data)); break;
+	case 2: asm("movdqa %0, %%xmm2" : : "m"(*data)); break;
+	case 3: asm("movdqa %0, %%xmm3" : : "m"(*data)); break;
+	case 4: asm("movdqa %0, %%xmm4" : : "m"(*data)); break;
+	case 5: asm("movdqa %0, %%xmm5" : : "m"(*data)); break;
+	case 6: asm("movdqa %0, %%xmm6" : : "m"(*data)); break;
+	case 7: asm("movdqa %0, %%xmm7" : : "m"(*data)); break;
 #ifdef CONFIG_X86_64
-	case 8: asm("movdqu %0, %%xmm8" : : "m"(*data)); break;
-	case 9: asm("movdqu %0, %%xmm9" : : "m"(*data)); break;
-	case 10: asm("movdqu %0, %%xmm10" : : "m"(*data)); break;
-	case 11: asm("movdqu %0, %%xmm11" : : "m"(*data)); break;
-	case 12: asm("movdqu %0, %%xmm12" : : "m"(*data)); break;
-	case 13: asm("movdqu %0, %%xmm13" : : "m"(*data)); break;
-	case 14: asm("movdqu %0, %%xmm14" : : "m"(*data)); break;
-	case 15: asm("movdqu %0, %%xmm15" : : "m"(*data)); break;
+	case 8: asm("movdqa %0, %%xmm8" : : "m"(*data)); break;
+	case 9: asm("movdqa %0, %%xmm9" : : "m"(*data)); break;
+	case 10: asm("movdqa %0, %%xmm10" : : "m"(*data)); break;
+	case 11: asm("movdqa %0, %%xmm11" : : "m"(*data)); break;
+	case 12: asm("movdqa %0, %%xmm12" : : "m"(*data)); break;
+	case 13: asm("movdqa %0, %%xmm13" : : "m"(*data)); break;
+	case 14: asm("movdqa %0, %%xmm14" : : "m"(*data)); break;
+	case 15: asm("movdqa %0, %%xmm15" : : "m"(*data)); break;
 #endif
 	default: BUG();
 	}
-- 
cgit v1.2.3


From fd0a0d82083747301f6c8084b4141bb490625016 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:15 +0200
Subject: KVM: x86 emulator: mark opcode tables const

The opcode tables never change at runtime, therefor mark them const.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5a0fee1a19c9..fd06f9d65847 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -161,9 +161,9 @@ struct opcode {
 	u64 intercept : 8;
 	union {
 		int (*execute)(struct x86_emulate_ctxt *ctxt);
-		struct opcode *group;
-		struct group_dual *gdual;
-		struct gprefix *gprefix;
+		const struct opcode *group;
+		const struct group_dual *gdual;
+		const struct gprefix *gprefix;
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
 };
@@ -3574,13 +3574,13 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt)
 		I2bv(((_f) | DstReg | SrcMem | ModRM) & ~Lock, _e),	\
 		I2bv(((_f) & ~Lock) | DstAcc | SrcImm, _e)
 
-static struct opcode group7_rm1[] = {
+static const struct opcode group7_rm1[] = {
 	DI(SrcNone | Priv, monitor),
 	DI(SrcNone | Priv, mwait),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group7_rm3[] = {
+static const struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		vmrun,		check_svme_pa),
 	II(SrcNone  | Prot | VendorSpecific,	em_vmmcall,	vmmcall),
 	DIP(SrcNone | Prot | Priv,		vmload,		check_svme_pa),
@@ -3591,13 +3591,13 @@ static struct opcode group7_rm3[] = {
 	DIP(SrcNone | Prot | Priv,		invlpga,	check_svme),
 };
 
-static struct opcode group7_rm7[] = {
+static const struct opcode group7_rm7[] = {
 	N,
 	DIP(SrcNone, rdtscp, check_rdtsc),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group1[] = {
+static const struct opcode group1[] = {
 	I(Lock, em_add),
 	I(Lock | PageTable, em_or),
 	I(Lock, em_adc),
@@ -3608,11 +3608,11 @@ static struct opcode group1[] = {
 	I(0, em_cmp),
 };
 
-static struct opcode group1A[] = {
+static const struct opcode group1A[] = {
 	I(DstMem | SrcNone | Mov | Stack, em_pop), N, N, N, N, N, N, N,
 };
 
-static struct opcode group3[] = {
+static const struct opcode group3[] = {
 	I(DstMem | SrcImm, em_test),
 	I(DstMem | SrcImm, em_test),
 	I(DstMem | SrcNone | Lock, em_not),
@@ -3623,13 +3623,13 @@ static struct opcode group3[] = {
 	I(SrcMem, em_idiv_ex),
 };
 
-static struct opcode group4[] = {
+static const struct opcode group4[] = {
 	I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
 	I(ByteOp | DstMem | SrcNone | Lock, em_grp45),
 	N, N, N, N, N, N,
 };
 
-static struct opcode group5[] = {
+static const struct opcode group5[] = {
 	I(DstMem | SrcNone | Lock,		em_grp45),
 	I(DstMem | SrcNone | Lock,		em_grp45),
 	I(SrcMem | Stack,			em_grp45),
@@ -3639,7 +3639,7 @@ static struct opcode group5[] = {
 	I(SrcMem | Stack,			em_grp45), N,
 };
 
-static struct opcode group6[] = {
+static const struct opcode group6[] = {
 	DI(Prot,	sldt),
 	DI(Prot,	str),
 	II(Prot | Priv | SrcMem16, em_lldt, lldt),
@@ -3647,7 +3647,7 @@ static struct opcode group6[] = {
 	N, N, N, N,
 };
 
-static struct group_dual group7 = { {
+static const struct group_dual group7 = { {
 	II(Mov | DstMem | Priv,			em_sgdt, sgdt),
 	II(Mov | DstMem | Priv,			em_sidt, sidt),
 	II(SrcMem | Priv,			em_lgdt, lgdt),
@@ -3664,7 +3664,7 @@ static struct group_dual group7 = { {
 	EXT(0, group7_rm7),
 } };
 
-static struct opcode group8[] = {
+static const struct opcode group8[] = {
 	N, N, N, N,
 	I(DstMem | SrcImmByte,				em_bt),
 	I(DstMem | SrcImmByte | Lock | PageTable,	em_bts),
@@ -3672,26 +3672,26 @@ static struct opcode group8[] = {
 	I(DstMem | SrcImmByte | Lock | PageTable,	em_btc),
 };
 
-static struct group_dual group9 = { {
+static const struct group_dual group9 = { {
 	N, I(DstMem64 | Lock | PageTable, em_cmpxchg8b), N, N, N, N, N, N,
 }, {
 	N, N, N, N, N, N, N, N,
 } };
 
-static struct opcode group11[] = {
+static const struct opcode group11[] = {
 	I(DstMem | SrcImm | Mov | PageTable, em_mov),
 	X7(D(Undefined)),
 };
 
-static struct gprefix pfx_0f_6f_0f_7f = {
+static const struct gprefix pfx_0f_6f_0f_7f = {
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
-static struct gprefix pfx_vmovntpx = {
+static const struct gprefix pfx_vmovntpx = {
 	I(0, em_mov), N, N, N,
 };
 
-static struct opcode opcode_table[256] = {
+static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	I6ALU(Lock, em_add),
 	I(ImplicitOps | Stack | No64 | Src2ES, em_push_sreg),
@@ -3808,7 +3808,7 @@ static struct opcode opcode_table[256] = {
 	D(ImplicitOps), D(ImplicitOps), G(0, group4), G(0, group5),
 };
 
-static struct opcode twobyte_table[256] = {
+static const struct opcode twobyte_table[256] = {
 	/* 0x00 - 0x0F */
 	G(0, group6), GD(0, &group7), N, N,
 	N, I(ImplicitOps | VendorSpecific, em_syscall),
-- 
cgit v1.2.3


From 0225fb509d51fcf777eb0aa31c304c582e3248fd Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:16 +0200
Subject: KVM: x86 emulator: constify emulate_ops

We never change emulate_ops[] at runtime so it should be r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  2 +-
 arch/x86/kvm/emulate.c             | 22 +++++++++++-----------
 arch/x86/kvm/x86.c                 |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 282aee5d6ac1..b5bb73aecc06 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -250,7 +250,7 @@ struct read_cache {
 };
 
 struct x86_emulate_ctxt {
-	struct x86_emulate_ops *ops;
+	const struct x86_emulate_ops *ops;
 
 	/* Register state before/after emulation. */
 	unsigned long eflags;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index fd06f9d65847..663e95881bdb 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1325,7 +1325,7 @@ static int read_interrupt_descriptor(struct x86_emulate_ctxt *ctxt,
 static void get_descriptor_table_ptr(struct x86_emulate_ctxt *ctxt,
 				     u16 selector, struct desc_ptr *dt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 
 	if (selector & 1 << 2) {
 		struct desc_struct desc;
@@ -1747,7 +1747,7 @@ static int em_popa(struct x86_emulate_ctxt *ctxt)
 
 static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	int rc;
 	struct desc_ptr dt;
 	gva_t cs_addr;
@@ -2129,7 +2129,7 @@ static bool vendor_intel(struct x86_emulate_ctxt *ctxt)
 
 static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	u32 eax, ebx, ecx, edx;
 
 	/*
@@ -2173,7 +2173,7 @@ static bool em_syscall_is_enabled(struct x86_emulate_ctxt *ctxt)
 
 static int em_syscall(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
@@ -2231,7 +2231,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
 
 static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	u16 cs_sel, ss_sel;
@@ -2294,7 +2294,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 
 static int em_sysexit(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct cs, ss;
 	u64 msr_data;
 	int usermode;
@@ -2357,7 +2357,7 @@ static bool emulator_bad_iopl(struct x86_emulate_ctxt *ctxt)
 static bool emulator_io_port_access_allowed(struct x86_emulate_ctxt *ctxt,
 					    u16 port, u16 len)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct tr_seg;
 	u32 base3;
 	int r;
@@ -2476,7 +2476,7 @@ static int task_switch_16(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_16 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
@@ -2623,7 +2623,7 @@ static int task_switch_32(struct x86_emulate_ctxt *ctxt,
 			  u16 tss_selector, u16 old_tss_sel,
 			  ulong old_tss_base, struct desc_struct *new_desc)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct tss_segment_32 tss_seg;
 	int ret;
 	u32 new_tss_base = get_desc_base(new_desc);
@@ -2667,7 +2667,7 @@ static int emulator_do_task_switch(struct x86_emulate_ctxt *ctxt,
 				   u16 tss_selector, int idt_index, int reason,
 				   bool has_error_code, u32 error_code)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	struct desc_struct curr_tss_desc, next_tss_desc;
 	int ret;
 	u16 old_tss_sel = get_segment_selector(ctxt, VCPU_SREG_TR);
@@ -4339,7 +4339,7 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt,
 
 int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 {
-	struct x86_emulate_ops *ops = ctxt->ops;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 	int rc = X86EMUL_CONTINUE;
 	int saved_dst_type = ctxt->dst.type;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 20f2266dfb64..0dc066f0428d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4323,7 +4323,7 @@ static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulon
 	kvm_register_write(emul_to_vcpu(ctxt), reg, val);
 }
 
-static struct x86_emulate_ops emulate_ops = {
+static const struct x86_emulate_ops emulate_ops = {
 	.read_gpr            = emulator_read_gpr,
 	.write_gpr           = emulator_write_gpr,
 	.read_std            = kvm_read_guest_virt_system,
-- 
cgit v1.2.3


From 0fbe9b0b19fb92eee2cf23c23d63d6b3312681e5 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:17 +0200
Subject: KVM: x86: constify read_write_emulator_ops

We never change those, make them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0dc066f0428d..317241619e2d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3773,14 +3773,14 @@ static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa,
 	return X86EMUL_CONTINUE;
 }
 
-static struct read_write_emulator_ops read_emultor = {
+static const struct read_write_emulator_ops read_emultor = {
 	.read_write_prepare = read_prepare,
 	.read_write_emulate = read_emulate,
 	.read_write_mmio = vcpu_mmio_read,
 	.read_write_exit_mmio = read_exit_mmio,
 };
 
-static struct read_write_emulator_ops write_emultor = {
+static const struct read_write_emulator_ops write_emultor = {
 	.read_write_emulate = write_emulate,
 	.read_write_mmio = write_mmio,
 	.read_write_exit_mmio = write_exit_mmio,
@@ -3791,7 +3791,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val,
 				       unsigned int bytes,
 				       struct x86_exception *exception,
 				       struct kvm_vcpu *vcpu,
-				       struct read_write_emulator_ops *ops)
+				       const struct read_write_emulator_ops *ops)
 {
 	gpa_t gpa;
 	int handled, ret;
@@ -3840,7 +3840,7 @@ mmio:
 int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr,
 			void *val, unsigned int bytes,
 			struct x86_exception *exception,
-			struct read_write_emulator_ops *ops)
+			const struct read_write_emulator_ops *ops)
 {
 	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
 	gpa_t gpa;
-- 
cgit v1.2.3


From f1d248315afc55771c3991b934014daa154d05f1 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:18 +0200
Subject: KVM: x86: more constification

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/lapic.c | 2 +-
 arch/x86/kvm/x86.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 18d149d80209..07ad628dadb7 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -198,7 +198,7 @@ static inline int apic_x2apic_mode(struct kvm_lapic *apic)
 	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
 }
 
-static unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
+static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTPC */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 317241619e2d..666da13c34fc 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -817,7 +817,7 @@ static u32 msrs_to_save[] = {
 
 static unsigned num_msrs_to_save;
 
-static u32 emulated_msrs[] = {
+static const u32 emulated_msrs[] = {
 	MSR_IA32_TSCDEADLINE,
 	MSR_IA32_MISC_ENABLE,
 	MSR_IA32_MCG_STATUS,
-- 
cgit v1.2.3


From 772e031899fdf3c7636d66aae9b0b57d1aaebb93 Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:19 +0200
Subject: KVM: VMX: constify lookup tables

We use vmcs_field_to_offset_table[], kvm_vmx_segment_fields[] and
kvm_vmx_exit_handlers[] as lookup tables only -- make them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 248c2b490e9b..d62b4139a292 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -450,7 +450,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 #define FIELD64(number, name)	[number] = VMCS12_OFFSET(name), \
 				[number##_HIGH] = VMCS12_OFFSET(name)+4
 
-static unsigned short vmcs_field_to_offset_table[] = {
+static const unsigned short vmcs_field_to_offset_table[] = {
 	FIELD(VIRTUAL_PROCESSOR_ID, virtual_processor_id),
 	FIELD(GUEST_ES_SELECTOR, guest_es_selector),
 	FIELD(GUEST_CS_SELECTOR, guest_cs_selector),
@@ -666,7 +666,7 @@ static struct vmx_capability {
 		.ar_bytes = GUEST_##seg##_AR_BYTES,	   	\
 	}
 
-static struct kvm_vmx_segment_field {
+static const struct kvm_vmx_segment_field {
 	unsigned selector;
 	unsigned base;
 	unsigned limit;
@@ -2695,7 +2695,7 @@ static __exit void hardware_unsetup(void)
 
 static void fix_pmode_dataseg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	struct kvm_segment tmp = *save;
 
 	if (!(vmcs_readl(sf->base) == tmp.base && tmp.s)) {
@@ -2764,7 +2764,7 @@ static gva_t rmode_tss_base(struct kvm *kvm)
 
 static void fix_rmode_seg(int seg, struct kvm_segment *save)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 
 	vmcs_write16(sf->selector, save->base >> 4);
 	vmcs_write32(sf->base, save->base & 0xffff0);
@@ -3202,7 +3202,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	u32 ar;
 
 	vmx_segment_cache_clear(vmx);
@@ -3572,7 +3572,7 @@ out:
 
 static void seg_setup(int seg)
 {
-	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+	const struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
 	unsigned int ar;
 
 	vmcs_write16(sf->selector, 0);
@@ -5655,7 +5655,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
  * to be done to userspace and return 0.
  */
-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
+static int (*const kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
 	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
 	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
 	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
-- 
cgit v1.2.3


From 09941fbb712655cde9b350852be7a99a6f61a03f Mon Sep 17 00:00:00 2001
From: Mathias Krause <minipli@googlemail.com>
Date: Thu, 30 Aug 2012 01:30:20 +0200
Subject: KVM: SVM: constify lookup tables

We never modify direct_access_msrs[], msrpm_ranges[],
svm_exit_handlers[] or x86_intercept_map[] at runtime.
Mark them r/o.

Signed-off-by: Mathias Krause <minipli@googlemail.com>
Cc: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 31be4a557447..611c72875fb9 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -163,7 +163,7 @@ static DEFINE_PER_CPU(u64, current_tsc_ratio);
 
 #define MSR_INVALID			0xffffffffU
 
-static struct svm_direct_access_msrs {
+static const struct svm_direct_access_msrs {
 	u32 index;   /* Index of the MSR */
 	bool always; /* True if intercept is always on */
 } direct_access_msrs[] = {
@@ -400,7 +400,7 @@ struct svm_init_data {
 	int r;
 };
 
-static u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
+static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 
 #define NUM_MSR_MAPS ARRAY_SIZE(msrpm_ranges)
 #define MSRS_RANGE_SIZE 2048
@@ -3267,7 +3267,7 @@ static int pause_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
+static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = {
 	[SVM_EXIT_READ_CR0]			= cr_interception,
 	[SVM_EXIT_READ_CR3]			= cr_interception,
 	[SVM_EXIT_READ_CR4]			= cr_interception,
@@ -4068,7 +4068,7 @@ static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
 #define POST_MEM(exit) { .exit_code = (exit), \
 			.stage = X86_ICPT_POST_MEMACCESS, }
 
-static struct __x86_intercept {
+static const struct __x86_intercept {
 	u32 exit_code;
 	enum x86_intercept_stage stage;
 } x86_intercept_map[] = {
-- 
cgit v1.2.3


From 50e900417b8096939d12a46848f965e27a905e36 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 4 Sep 2012 15:45:17 -0400
Subject: xen/p2m: Fix one-off error in checking the P2M tree directory.

We would traverse the full P2M top directory (from 0->MAX_DOMAIN_PAGES
inclusive) when trying to figure out whether we can re-use some of the
P2M middle leafs.

Which meant that if the kernel was compiled with MAX_DOMAIN_PAGES=512
we would try to use the 512th entry. Fortunately for us the p2m_top_index
has a check for this:

 BUG_ON(pfn >= MAX_P2M_PFN);

which we hit and saw this:

(XEN) domain_crash_sync called from entry.S
(XEN) Domain 0 (vcpu#0) crashed on cpu#0:
(XEN) ----[ Xen-4.1.2-OVM  x86_64  debug=n  Tainted:    C ]----
(XEN) CPU:    0
(XEN) RIP:    e033:[<ffffffff819cadeb>]
(XEN) RFLAGS: 0000000000000212   EM: 1   CONTEXT: pv guest
(XEN) rax: ffffffff81db5000   rbx: ffffffff81db4000   rcx: 0000000000000000
(XEN) rdx: 0000000000480211   rsi: 0000000000000000   rdi: ffffffff81db4000
(XEN) rbp: ffffffff81793db8   rsp: ffffffff81793d38   r8:  0000000008000000
(XEN) r9:  4000000000000000   r10: 0000000000000000   r11: ffffffff81db7000
(XEN) r12: 0000000000000ff8   r13: ffffffff81df1ff8   r14: ffffffff81db6000
(XEN) r15: 0000000000000ff8   cr0: 000000008005003b   cr4: 00000000000026f0
(XEN) cr3: 0000000661795000   cr2: 0000000000000000

Fixes-Oracle-Bug: 14570662
CC: stable@vger.kernel.org # only for v3.5
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/p2m.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index d4b255463253..76ba0e97e530 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -599,7 +599,7 @@ bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_
 	if (p2m_index(set_pfn))
 		return false;
 
-	for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
+	for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) {
 		topidx = p2m_top_index(pfn);
 
 		if (!p2m_top[topidx])
-- 
cgit v1.2.3


From ce7184bdbd38d920fb515266fbbdc585ad2e5493 Mon Sep 17 00:00:00 2001
From: Alex Shi <alex.shi@intel.com>
Date: Fri, 24 Aug 2012 08:55:13 +0000
Subject: xen: fix logical error in tlb flushing

While TLB_FLUSH_ALL gets passed as 'end' argument to
flush_tlb_others(), the Xen code was made to check its 'start'
parameter. That may give a incorrect op.cmd to MMUEXT_INVLPG_MULTI
instead of MMUEXT_TLB_FLUSH_MULTI. Then it causes some page can not
be flushed from TLB.

This patch fixed this issue.

Reported-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Alex Shi <alex.shi@intel.com>
Acked-by: Jan Beulich <jbeulich@suse.com>
Tested-by: Yongjie Ren <yongjie.ren@intel.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index b65a76133f4f..5141d808e751 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1283,7 +1283,7 @@ static void xen_flush_tlb_others(const struct cpumask *cpus,
 	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
 
 	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
-	if (start != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
+	if (end != TLB_FLUSH_ALL && (end - start) <= PAGE_SIZE) {
 		args->op.cmd = MMUEXT_INVLPG_MULTI;
 		args->op.arg1.linear_addr = start;
 	}
-- 
cgit v1.2.3


From 69870a847856a1ba81f655a8633fce5f5b614730 Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Thu, 30 Aug 2012 13:58:11 +0100
Subject: xen/mm: return more precise error from xen_remap_domain_range()

Callers of xen_remap_domain_range() need to know if the remap failed
because frame is currently paged out.  So they can retry the remap
later on.  Return -ENOENT in this case.

This assumes that the error codes returned by Xen are a subset of
those used by the kernel.  It is unclear if this is defined as part of
the hypercall ABI.

Acked-by: Andres Lagar-Cavilla <andres@lagarcavilla.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 885a22354a96..2d9e7c9c0e7b 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2331,8 +2331,8 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 		if (err)
 			goto out;
 
-		err = -EFAULT;
-		if (HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid) < 0)
+		err = HYPERVISOR_mmu_update(mmu_update, batch, NULL, domid);
+		if (err < 0)
 			goto out;
 
 		nr -= batch;
-- 
cgit v1.2.3


From 2df72e9bc4c505d8357012f2924589f3d16f9d44 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 24 Aug 2012 15:54:57 -0300
Subject: KVM: split kvm_arch_flush_shadow

Introducing kvm_arch_flush_shadow_memslot, to invalidate the
translations of a single memory slot.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c   | 8 +++++++-
 arch/powerpc/kvm/powerpc.c | 6 +++++-
 arch/s390/kvm/kvm-s390.c   | 7 ++++++-
 arch/x86/kvm/x86.c         | 8 +++++++-
 include/linux/kvm_host.h   | 6 +++++-
 virt/kvm/kvm_main.c        | 8 ++++----
 6 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index eac65380bd20..8b3a9c0e771d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1613,11 +1613,17 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_flush_remote_tlbs(kvm);
 }
 
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_arch_flush_shadow_all();
+}
+
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 879b14a61403..4d213b8b0fb5 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -334,8 +334,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	kvmppc_core_commit_memory_region(kvm, mem);
 }
 
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
 {
 }
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index e83df7f0fedd..ecced9d18986 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -969,7 +969,12 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	return;
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
+{
+}
+
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
 {
 }
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 666da13c34fc..37797a090a8f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6447,12 +6447,18 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 	spin_unlock(&kvm->mmu_lock);
 }
 
-void kvm_arch_flush_shadow(struct kvm *kvm)
+void kvm_arch_flush_shadow_all(struct kvm *kvm)
 {
 	kvm_mmu_zap_all(kvm);
 	kvm_reload_remote_mmus(kvm);
 }
 
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot)
+{
+	kvm_arch_flush_shadow_all(kvm);
+}
+
 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 {
 	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9c0b3c3ae0a5..40791930bc15 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -458,7 +458,11 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 				int user_alloc);
 bool kvm_largepages_enabled(void);
 void kvm_disable_largepages(void);
-void kvm_arch_flush_shadow(struct kvm *kvm);
+/* flush all memory translations */
+void kvm_arch_flush_shadow_all(struct kvm *kvm);
+/* flush memory translations pointing to 'slot' */
+void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
+				   struct kvm_memory_slot *slot);
 
 int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 			    int nr_pages);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 6425906d7cec..a4bf05be5fea 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -408,7 +408,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn,
 	int idx;
 
 	idx = srcu_read_lock(&kvm->srcu);
-	kvm_arch_flush_shadow(kvm);
+	kvm_arch_flush_shadow_all(kvm);
 	srcu_read_unlock(&kvm->srcu, idx);
 }
 
@@ -582,7 +582,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 	mmu_notifier_unregister(&kvm->mmu_notifier, kvm->mm);
 #else
-	kvm_arch_flush_shadow(kvm);
+	kvm_arch_flush_shadow_all(kvm);
 #endif
 	kvm_arch_destroy_vm(kvm);
 	kvm_free_physmem(kvm);
@@ -814,7 +814,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		 * 	- gfn_to_hva (kvm_read_guest, gfn_to_pfn)
 		 * 	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
-		kvm_arch_flush_shadow(kvm);
+		kvm_arch_flush_shadow_memslot(kvm, slot);
 		kfree(old_memslots);
 	}
 
@@ -854,7 +854,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	 * mmio sptes.
 	 */
 	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
-		kvm_arch_flush_shadow(kvm);
+		kvm_arch_flush_shadow_all(kvm);
 
 	kvm_free_physmem_slot(&old, &new);
 	kfree(old_memslots);
-- 
cgit v1.2.3


From 3b4dc3a031110753b9ba36432dbd21f989fcee56 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Tue, 28 Aug 2012 17:43:26 -0300
Subject: KVM: move postcommit flush to x86, as mmio sptes are x86 specific

Other arches do not need this.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

v2: fix incorrect deletion of mmio sptes on gpa move (noticed by Takuya)
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c  | 8 ++++++++
 virt/kvm/kvm_main.c | 7 -------
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 37797a090a8f..6f6812ec8419 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6445,6 +6445,14 @@ void kvm_arch_commit_memory_region(struct kvm *kvm,
 		kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
 	kvm_mmu_slot_remove_write_access(kvm, mem->slot);
 	spin_unlock(&kvm->mmu_lock);
+	/*
+	 * If memory slot is created, or moved, we need to clear all
+	 * mmio sptes.
+	 */
+	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT) {
+		kvm_mmu_zap_all(kvm);
+		kvm_reload_remote_mmus(kvm);
+	}
 }
 
 void kvm_arch_flush_shadow_all(struct kvm *kvm)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f41ea1262d51..4fe02d900810 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -849,13 +849,6 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	kvm_arch_commit_memory_region(kvm, mem, old, user_alloc);
 
-	/*
-	 * If the new memory slot is created, we need to clear all
-	 * mmio sptes.
-	 */
-	if (npages && old.base_gfn != mem->guest_phys_addr >> PAGE_SHIFT)
-		kvm_arch_flush_shadow_all(kvm);
-
 	kvm_free_physmem_slot(&old, &new);
 	kfree(old_memslots);
 
-- 
cgit v1.2.3


From 716d51abff06f48425cef15d78ca6f36093f6dbf Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:26 +0300
Subject: KVM: Provide userspace IO exit completion callback

Current code assumes that IO exit was due to instruction emulation
and handles execution back to emulator directly. This patch adds new
userspace IO exit completion callback that can be set by any other code
that caused IO exit to userspace.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/x86.c              | 93 +++++++++++++++++++++++++----------------
 2 files changed, 57 insertions(+), 37 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index fc0e752e7564..64adb6117e19 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -414,6 +414,7 @@ struct kvm_vcpu_arch {
 	struct x86_emulate_ctxt emulate_ctxt;
 	bool emulate_regs_need_sync_to_vcpu;
 	bool emulate_regs_need_sync_from_vcpu;
+	int (*complete_userspace_io)(struct kvm_vcpu *vcpu);
 
 	gpa_t time;
 	struct pvclock_vcpu_time_info hv_clock;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f6812ec8419..f91e2c9d7cb1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4544,6 +4544,9 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 	return true;
 }
 
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
+static int complete_emulated_pio(struct kvm_vcpu *vcpu);
+
 int x86_emulate_instruction(struct kvm_vcpu *vcpu,
 			    unsigned long cr2,
 			    int emulation_type,
@@ -4614,13 +4617,16 @@ restart:
 	} else if (vcpu->arch.pio.count) {
 		if (!vcpu->arch.pio.in)
 			vcpu->arch.pio.count = 0;
-		else
+		else {
 			writeback = false;
+			vcpu->arch.complete_userspace_io = complete_emulated_pio;
+		}
 		r = EMULATE_DO_MMIO;
 	} else if (vcpu->mmio_needed) {
 		if (!vcpu->mmio_is_write)
 			writeback = false;
 		r = EMULATE_DO_MMIO;
+		vcpu->arch.complete_userspace_io = complete_emulated_mmio;
 	} else if (r == EMULATION_RESTART)
 		goto restart;
 	else
@@ -5476,6 +5482,24 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	return r;
 }
 
+static inline int complete_emulated_io(struct kvm_vcpu *vcpu)
+{
+	int r;
+	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
+	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
+	if (r != EMULATE_DONE)
+		return 0;
+	return 1;
+}
+
+static int complete_emulated_pio(struct kvm_vcpu *vcpu)
+{
+	BUG_ON(!vcpu->arch.pio.count);
+
+	return complete_emulated_io(vcpu);
+}
+
 /*
  * Implements the following, as a state machine:
  *
@@ -5492,47 +5516,37 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
  *      copy data
  *      exit
  */
-static int complete_mmio(struct kvm_vcpu *vcpu)
+static int complete_emulated_mmio(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_mmio_fragment *frag;
-	int r;
 
-	if (!(vcpu->arch.pio.count || vcpu->mmio_needed))
-		return 1;
+	BUG_ON(!vcpu->mmio_needed);
 
-	if (vcpu->mmio_needed) {
-		/* Complete previous fragment */
-		frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
-		if (!vcpu->mmio_is_write)
-			memcpy(frag->data, run->mmio.data, frag->len);
-		if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
-			vcpu->mmio_needed = 0;
-			if (vcpu->mmio_is_write)
-				return 1;
-			vcpu->mmio_read_completed = 1;
-			goto done;
-		}
-		/* Initiate next fragment */
-		++frag;
-		run->exit_reason = KVM_EXIT_MMIO;
-		run->mmio.phys_addr = frag->gpa;
+	/* Complete previous fragment */
+	frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++];
+	if (!vcpu->mmio_is_write)
+		memcpy(frag->data, run->mmio.data, frag->len);
+	if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) {
+		vcpu->mmio_needed = 0;
 		if (vcpu->mmio_is_write)
-			memcpy(run->mmio.data, frag->data, frag->len);
-		run->mmio.len = frag->len;
-		run->mmio.is_write = vcpu->mmio_is_write;
-		return 0;
-
-	}
-done:
-	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
-	r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
-	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
-	if (r != EMULATE_DONE)
-		return 0;
-	return 1;
+			return 1;
+		vcpu->mmio_read_completed = 1;
+		return complete_emulated_io(vcpu);
+	}
+	/* Initiate next fragment */
+	++frag;
+	run->exit_reason = KVM_EXIT_MMIO;
+	run->mmio.phys_addr = frag->gpa;
+	if (vcpu->mmio_is_write)
+		memcpy(run->mmio.data, frag->data, frag->len);
+	run->mmio.len = frag->len;
+	run->mmio.is_write = vcpu->mmio_is_write;
+	vcpu->arch.complete_userspace_io = complete_emulated_mmio;
+	return 0;
 }
 
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
 	int r;
@@ -5559,9 +5573,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		}
 	}
 
-	r = complete_mmio(vcpu);
-	if (r <= 0)
-		goto out;
+	if (unlikely(vcpu->arch.complete_userspace_io)) {
+		int (*cui)(struct kvm_vcpu *) = vcpu->arch.complete_userspace_io;
+		vcpu->arch.complete_userspace_io = NULL;
+		r = cui(vcpu);
+		if (r <= 0)
+			goto out;
+	} else
+		WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed);
 
 	r = __vcpu_run(vcpu);
 
-- 
cgit v1.2.3


From 9d1b39a967871b7c69025dba7b7bdaee42871021 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:27 +0300
Subject: KVM: emulator: make x86 emulation modes enum instead of defines

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h | 22 ++++++++++------------
 arch/x86/kvm/emulate.c             |  4 +++-
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index b5bb73aecc06..e9e5675c0dfb 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -249,6 +249,15 @@ struct read_cache {
 	unsigned long end;
 };
 
+/* Execution mode, passed to the emulator. */
+enum x86emul_mode {
+	X86EMUL_MODE_REAL,	/* Real mode.             */
+	X86EMUL_MODE_VM86,	/* Virtual 8086 mode.     */
+	X86EMUL_MODE_PROT16,	/* 16-bit protected mode. */
+	X86EMUL_MODE_PROT32,	/* 32-bit protected mode. */
+	X86EMUL_MODE_PROT64,	/* 64-bit (long) mode.    */
+};
+
 struct x86_emulate_ctxt {
 	const struct x86_emulate_ops *ops;
 
@@ -256,7 +265,7 @@ struct x86_emulate_ctxt {
 	unsigned long eflags;
 	unsigned long eip; /* eip before instruction emulation */
 	/* Emulated execution mode, represented by an X86EMUL_MODE value. */
-	int mode;
+	enum x86emul_mode mode;
 
 	/* interruptibility state, as a result of execution of STI or MOV SS */
 	int interruptibility;
@@ -308,17 +317,6 @@ struct x86_emulate_ctxt {
 #define REPE_PREFIX	0xf3
 #define REPNE_PREFIX	0xf2
 
-/* Execution mode, passed to the emulator. */
-#define X86EMUL_MODE_REAL     0	/* Real mode.             */
-#define X86EMUL_MODE_VM86     1	/* Virtual 8086 mode.     */
-#define X86EMUL_MODE_PROT16   2	/* 16-bit protected mode. */
-#define X86EMUL_MODE_PROT32   4	/* 32-bit protected mode. */
-#define X86EMUL_MODE_PROT64   8	/* 64-bit (long) mode.    */
-
-/* any protected mode   */
-#define X86EMUL_MODE_PROT     (X86EMUL_MODE_PROT16|X86EMUL_MODE_PROT32| \
-			       X86EMUL_MODE_PROT64)
-
 /* CPUID vendors */
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ebx 0x68747541
 #define X86EMUL_CPUID_VENDOR_AuthenticAMD_ecx 0x444d4163
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 663e95881bdb..5fe06a8fbebc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2268,6 +2268,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
 		if (msr_data == 0x0)
 			return emulate_gp(ctxt, 0);
 		break;
+	default:
+		break;
 	}
 
 	ctxt->eflags &= ~(EFLG_VM | EFLG_IF | EFLG_RF);
@@ -4400,7 +4402,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
 	}
 
 	/* Instruction can only be executed in protected mode */
-	if ((ctxt->d & Prot) && !(ctxt->mode & X86EMUL_MODE_PROT)) {
+	if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
 		rc = emulate_ud(ctxt);
 		goto done;
 	}
-- 
cgit v1.2.3


From f3bd64c68a8f1245e3d037f70c6936cd7bb1196b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:28 +0300
Subject: KVM: emulator: string_addr_inc() cleanup

Remove unneeded segment argument. Address structure already has correct
segment which was put there during decode.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/emulate.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 5fe06a8fbebc..415f903facd3 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2790,14 +2790,13 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 	return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK;
 }
 
-static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg,
-			    int reg, struct operand *op)
+static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
+		struct operand *op)
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
 
 	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
-	op->addr.mem.seg = seg;
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -4570,12 +4569,10 @@ writeback:
 	ctxt->dst.type = saved_dst_type;
 
 	if ((ctxt->d & SrcMask) == SrcSI)
-		string_addr_inc(ctxt, seg_override(ctxt),
-				VCPU_REGS_RSI, &ctxt->src);
+		string_addr_inc(ctxt, VCPU_REGS_RSI, &ctxt->src);
 
 	if ((ctxt->d & DstMask) == DstDI)
-		string_addr_inc(ctxt, VCPU_SREG_ES, VCPU_REGS_RDI,
-				&ctxt->dst);
+		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
 		struct read_cache *r = &ctxt->io_read;
-- 
cgit v1.2.3


From b3356bf0dbb34980620f2f7def7d1b9a0d325225 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 3 Sep 2012 15:24:29 +0300
Subject: KVM: emulator: optimize "rep ins" handling

Optimize "rep ins" by allowing emulator to write back more than one
datum at a time. Introduce new operand type OP_MEM_STR which tells
writeback() that dst contains pointer to an array that should be written
back as opposite to just one data element.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_emulate.h |  4 +++-
 arch/x86/kvm/emulate.c             | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index e9e5675c0dfb..15f960c06ff7 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -213,8 +213,9 @@ typedef u32 __attribute__((vector_size(16))) sse128_t;
 
 /* Type, address-of, and value of an instruction's operand. */
 struct operand {
-	enum { OP_REG, OP_MEM, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
+	enum { OP_REG, OP_MEM, OP_MEM_STR, OP_IMM, OP_XMM, OP_MM, OP_NONE } type;
 	unsigned int bytes;
+	unsigned int count;
 	union {
 		unsigned long orig_val;
 		u64 orig_val64;
@@ -234,6 +235,7 @@ struct operand {
 		char valptr[sizeof(unsigned long) + 2];
 		sse128_t vec_val;
 		u64 mm_val;
+		void *data;
 	};
 };
 
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 415f903facd3..39171cb307ea 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1301,8 +1301,15 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt,
 		rc->end = n * size;
 	}
 
-	memcpy(dest, rc->data + rc->pos, size);
-	rc->pos += size;
+	if (ctxt->rep_prefix && !(ctxt->eflags & EFLG_DF)) {
+		ctxt->dst.data = rc->data + rc->pos;
+		ctxt->dst.type = OP_MEM_STR;
+		ctxt->dst.count = (rc->end - rc->pos) / size;
+		rc->pos = rc->end;
+	} else {
+		memcpy(dest, rc->data + rc->pos, size);
+		rc->pos += size;
+	}
 	return 1;
 }
 
@@ -1546,6 +1553,14 @@ static int writeback(struct x86_emulate_ctxt *ctxt)
 		if (rc != X86EMUL_CONTINUE)
 			return rc;
 		break;
+	case OP_MEM_STR:
+		rc = segmented_write(ctxt,
+				ctxt->dst.addr.mem,
+				ctxt->dst.data,
+				ctxt->dst.bytes * ctxt->dst.count);
+		if (rc != X86EMUL_CONTINUE)
+			return rc;
+		break;
 	case OP_XMM:
 		write_sse_reg(ctxt, &ctxt->dst.vec_val, ctxt->dst.addr.xmm);
 		break;
@@ -2793,7 +2808,7 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt,
 static void string_addr_inc(struct x86_emulate_ctxt *ctxt, int reg,
 		struct operand *op)
 {
-	int df = (ctxt->eflags & EFLG_DF) ? -1 : 1;
+	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
 	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
 	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
@@ -3733,7 +3748,7 @@ static const struct opcode opcode_table[256] = {
 	I(DstReg | SrcMem | ModRM | Src2Imm, em_imul_3op),
 	I(SrcImmByte | Mov | Stack, em_push),
 	I(DstReg | SrcMem | ModRM | Src2ImmByte, em_imul_3op),
-	I2bvIP(DstDI | SrcDX | Mov | String, em_in, ins, check_perm_in), /* insb, insw/insd */
+	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
 	X16(D(SrcImmByte)),
@@ -3991,6 +4006,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpDX:
 		op->type = OP_REG;
@@ -4034,6 +4050,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op,
 			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
 		op->addr.mem.seg = seg_override(ctxt);
 		op->val = 0;
+		op->count = 1;
 		break;
 	case OpImmFAddr:
 		op->type = OP_IMM;
@@ -4575,8 +4592,14 @@ writeback:
 		string_addr_inc(ctxt, VCPU_REGS_RDI, &ctxt->dst);
 
 	if (ctxt->rep_prefix && (ctxt->d & String)) {
+		unsigned int count;
 		struct read_cache *r = &ctxt->io_read;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+		if ((ctxt->d & SrcMask) == SrcSI)
+			count = ctxt->src.count;
+		else
+			count = ctxt->dst.count;
+		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
+				-count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
-- 
cgit v1.2.3


From a50abc3b2b469ee80bc0f9ef5b6d457ef72659a9 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 5 Sep 2012 20:00:52 +0300
Subject: KVM: use symbolic constant for nr interrupts

interrupt_bitmap is KVM_NR_INTERRUPTS bits in size,
so just use that instead of hard-coded constants
and math.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index f91e2c9d7cb1..c4d451ed1573 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2366,7 +2366,7 @@ static int kvm_vcpu_ioctl_set_lapic(struct kvm_vcpu *vcpu,
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
 				    struct kvm_interrupt *irq)
 {
-	if (irq->irq < 0 || irq->irq >= 256)
+	if (irq->irq < 0 || irq->irq >= KVM_NR_INTERRUPTS)
 		return -EINVAL;
 	if (irqchip_in_kernel(vcpu->kvm))
 		return -ENXIO;
@@ -5793,7 +5793,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 	if (mmu_reset_needed)
 		kvm_mmu_reset_context(vcpu);
 
-	max_bits = (sizeof sregs->interrupt_bitmap) << 3;
+	max_bits = KVM_NR_INTERRUPTS;
 	pending_vec = find_first_bit(
 		(const unsigned long *)sregs->interrupt_bitmap, max_bits);
 	if (pending_vec < max_bits) {
-- 
cgit v1.2.3


From f94a73f8dd5644f45f9d2e3139608ca83b932d93 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:43 +0300
Subject: crypto: twofish-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies and interleaves instructions better for out-of-order scheduling.

Tested on Intel Core i5-2450M and AMD FX-8100.

tcrypt ECB results:

Intel Core i5-2450M:

size    old-vs-new      new-vs-3way     old-vs-3way
        enc     dec     enc     dec     enc     dec
256     1.12x   1.13x   1.36x   1.37x   1.21x   1.22x
1k      1.14x   1.14x   1.48x   1.49x   1.29x   1.31x
8k      1.14x   1.14x   1.50x   1.52x   1.32x   1.33x

AMD FX-8100:

size    old-vs-new      new-vs-3way     old-vs-3way
        enc     dec     enc     dec     enc     dec
256     1.10x   1.11x   1.01x   1.01x   0.92x   0.91x
1k      1.11x   1.12x   1.08x   1.07x   0.97x   0.96x
8k      1.11x   1.13x   1.10x   1.08x   0.99x   0.97x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Further interleaving improvements for better out-of-order scheduling.

Tested-by: Borislav Petkov <bp@alien8.de>
Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 227 +++++++++++++++++-----------
 1 file changed, 142 insertions(+), 85 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 35f45574390d..1585abb13dde 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -47,16 +49,22 @@
 #define RC2 %xmm6
 #define RD2 %xmm7
 
-#define RX %xmm8
-#define RY %xmm9
+#define RX0 %xmm8
+#define RY0 %xmm9
+
+#define RX1 %xmm10
+#define RY1 %xmm11
 
-#define RK1 %xmm10
-#define RK2 %xmm11
+#define RK1 %xmm12
+#define RK2 %xmm13
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RT %xmm14
+#define RR %xmm15
+
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -65,6 +73,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RGS1  %r8
 #define RGS1d %r8d
 #define RGS2  %r9
@@ -73,89 +88,123 @@
 #define RGS3d %r10d
 
 
-#define lookup_32bit(t0, t1, t2, t3, src, dst) \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
-	movl		t0(CTX, RID1, 4), dst ## d;  \
-	xorl		t1(CTX, RID2, 4), dst ## d;  \
+#define lookup_32bit(t0, t1, t2, t3, src, dst, interleave_op, il_reg) \
+	movzbl		src ## bl,        RID1d;     \
+	movzbl		src ## bh,        RID2d;     \
 	shrq $16,	src;                         \
-	movb		src ## bl,        RID1b;     \
-	movb		src ## bh,        RID2b;     \
+	movl		t0(CTX, RID1, 4), dst ## d;  \
+	movl		t1(CTX, RID2, 4), RID2d;     \
+	movzbl		src ## bl,        RID1d;     \
+	xorl		RID2d,            dst ## d;  \
+	movzbl		src ## bh,        RID2d;     \
+	interleave_op(il_reg);			     \
 	xorl		t2(CTX, RID1, 4), dst ## d;  \
 	xorl		t3(CTX, RID2, 4), dst ## d;
 
-#define G(a, x, t0, t1, t2, t3) \
-	vmovq		a,    RGI1;               \
-	vpsrldq $8,	a,    x;                  \
-	vmovq		x,    RGI2;               \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define G(gi1, gi2, x, t0, t1, t2, t3) \
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS1, shr_next, ##gi1);  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS3, shr_next, ##gi2);  \
+	\
+	lookup_32bit(t0, t1, t2, t3, ##gi1, RGS2, dummy, none);      \
+	shlq $32,	RGS2;                                        \
+	orq		RGS1, RGS2;                                  \
+	lookup_32bit(t0, t1, t2, t3, ##gi2, RGS1, dummy, none);      \
+	shlq $32,	RGS1;                                        \
+	orq		RGS1, RGS3;
+
+#define round_head_2(a, b, x1, y1, x2, y2) \
+	vmovq		b ## 1, RGI3;           \
+	vpextrq $1,	b ## 1, RGI4;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS1); \
-	shrq $16,	RGI1;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI1, RGS2); \
-	shlq $32,	RGS2;                     \
-	orq		RGS1, RGS2;               \
+	G(RGI1, RGI2, x1, s0, s1, s2, s3);      \
+	vmovq		a ## 2, RGI1;           \
+	vpextrq $1,	a ## 2, RGI2;           \
+	vmovq		RGS2, x1;               \
+	vpinsrq $1,	RGS3, x1, x1;           \
 	\
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS1); \
-	shrq $16,	RGI2;                     \
-	lookup_32bit(t0, t1, t2, t3, RGI2, RGS3); \
-	shlq $32,	RGS3;                     \
-	orq		RGS1, RGS3;               \
+	G(RGI3, RGI4, y1, s1, s2, s3, s0);      \
+	vmovq		b ## 2, RGI3;           \
+	vpextrq $1,	b ## 2, RGI4;           \
+	vmovq		RGS2, y1;               \
+	vpinsrq $1,	RGS3, y1, y1;           \
 	\
-	vmovq		RGS2, x;                  \
-	vpinsrq $1,	RGS3, x, x;
+	G(RGI1, RGI2, x2, s0, s1, s2, s3);      \
+	vmovq		RGS2, x2;               \
+	vpinsrq $1,	RGS3, x2, x2;           \
+	\
+	G(RGI3, RGI4, y2, s1, s2, s3, s0);      \
+	vmovq		RGS2, y2;               \
+	vpinsrq $1,	RGS3, y2, y2;
 
-#define encround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+#define encround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(b);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
-	vpaddd			x, RK1, x; \
 	vpaddd			y, RK2, y; \
-	vpxor			x, c,   c; \
-	vpsrld $1,		c, x;      \
+	vpsrld $1,		c, RT;     \
 	vpslld $(32 - 1),	c, c;      \
-	vpor			c, x,   c; \
-	vpslld $1,		d, x;      \
-	vpsrld $(32 - 1),	d, d;      \
-	vpor			d, x,   d; \
-	vpxor			d, y,   d;
-
-#define decround(a, b, c, d, x, y) \
-	G(a, x, s0, s1, s2, s3);           \
-	G(b, y, s1, s2, s3, s0);           \
+	vpor			c, RT,  c; \
+	vpxor			d, y,   d; \
+
+#define decround_tail(a, b, c, d, x, y, prerotate) \
 	vpaddd			x, y,   x; \
+	vpaddd			x, RK1, RT;\
+	prerotate(a);			   \
+	vpxor			RT, c,  c; \
 	vpaddd			y, x,   y; \
 	vpaddd			y, RK2, y; \
 	vpxor			d, y,   d; \
 	vpsrld $1,		d, y;      \
 	vpslld $(32 - 1),	d, d;      \
 	vpor			d, y,   d; \
-	vpslld $1,		c, y;      \
-	vpsrld $(32 - 1),	c, c;      \
-	vpor			c, y,   c; \
-	vpaddd			x, RK1, x; \
-	vpxor			x, c,   c;
-
-#define encrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	encround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	encround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
-
-#define decrypt_round(n, a, b, c, d) \
-	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;           \
-	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;           \
-	decround(a ## 1, b ## 1, c ## 1, d ## 1, RX, RY); \
-	decround(a ## 2, b ## 2, c ## 2, d ## 2, RX, RY);
+
+#define rotate_1l(x) \
+	vpslld $1,		x, RR;     \
+	vpsrld $(32 - 1),	x, x;      \
+	vpor			x, RR,  x;
+
+#define preload_rgi(c) \
+	vmovq			c, RGI1; \
+	vpextrq $1,		c, RGI2;
+
+#define encrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	encround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	encround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
+
+#define decrypt_round(n, a, b, c, d, preload, prerotate) \
+	vbroadcastss (k+4*(2*(n)))(CTX),   RK1;                  \
+	vbroadcastss (k+4*(2*(n)+1))(CTX), RK2;                  \
+	round_head_2(a, b, RX0, RY0, RX1, RY1);                  \
+	decround_tail(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0, prerotate); \
+	preload(c ## 1);                                         \
+	decround_tail(a ## 2, b ## 2, c ## 2, d ## 2, RX1, RY1, prerotate);
 
 #define encrypt_cycle(n) \
-	encrypt_round((2*n), RA, RB, RC, RD);       \
-	encrypt_round(((2*n) + 1), RC, RD, RA, RB);
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l);
+
+#define encrypt_cycle_last(n) \
+	encrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l); \
+	encrypt_round(((2*n) + 1), RC, RD, RA, RB, dummy, dummy);
 
 #define decrypt_cycle(n) \
-	decrypt_round(((2*n) + 1), RC, RD, RA, RB); \
-	decrypt_round((2*n), RA, RB, RC, RD);
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, preload_rgi, rotate_1l);
 
+#define decrypt_cycle_last(n) \
+	decrypt_round(((2*n) + 1), RC, RD, RA, RB, preload_rgi, rotate_1l); \
+	decrypt_round((2*n), RA, RB, RC, RD, dummy, dummy);
 
 #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	vpunpckldq		x1, x0, t0; \
@@ -216,17 +265,20 @@ __twofish_enc_blk_8way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
 	vmovdqu w(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	preload_rgi(RA1);
+	rotate_1l(RD1);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
+	rotate_1l(RD2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	encrypt_cycle(0);
 	encrypt_cycle(1);
@@ -235,26 +287,27 @@ __twofish_enc_blk_8way:
 	encrypt_cycle(4);
 	encrypt_cycle(5);
 	encrypt_cycle(6);
-	encrypt_cycle(7);
+	encrypt_cycle_last(7);
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
+	leaq (4*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor8;
 
-	outunpack_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
 __enc_xor8:
-	outunpack_xor_blocks(%rsi, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	outunpack_xor_blocks(%r11, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	outunpack_xor_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
 
 	ret;
 
@@ -269,16 +322,19 @@ twofish_dec_blk_8way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
 	vmovdqu (w+4*4)(CTX), RK1;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX, RY, RK2);
-	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX, RY, RK2);
+	inpack_blocks(%rdx, RC1, RD1, RA1, RB1, RK1, RX0, RY0, RK2);
+	preload_rgi(RC1);
+	rotate_1l(RA1);
+	inpack_blocks(%rax, RC2, RD2, RA2, RB2, RK1, RX0, RY0, RK2);
+	rotate_1l(RA2);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	decrypt_cycle(7);
 	decrypt_cycle(6);
@@ -287,14 +343,15 @@ twofish_dec_blk_8way:
 	decrypt_cycle(3);
 	decrypt_cycle(2);
 	decrypt_cycle(1);
-	decrypt_cycle(0);
+	decrypt_cycle_last(0);
 
 	vmovdqu (w)(CTX), RK1;
 
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RK1, RX, RY, RK2);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX, RY, RK2);
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RK1, RX0, RY0, RK2);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RK1, RX0, RY0, RK2);
 
 	ret;
-- 
cgit v1.2.3


From ddaea7869d29beb9e0042c96ea52c9cca2afd68a Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:49 +0300
Subject: crypto: cast5-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies, interleaves instructions better for out-of-order scheduling
and merges constant 16-bit rotation with round-key variable rotation.

tcrypt ECB results (128bit key):

Intel Core i5-2450M:

size    old-vs-new      new-vs-generic  old-vs-generic
        enc     dec     enc     dec     enc     dec
256     1.18x   1.18x   2.45x   2.47x   2.08x   2.10x
1k      1.20x   1.20x   2.73x   2.73x   2.28x   2.28x
8k      1.20x   1.19x   2.73x   2.73x   2.28x   2.29x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Improvements to round-key variable rotation handling.
 - Further interleaving improvements for better out-of-order scheduling.

Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 266 ++++++++++++++++++------------
 1 file changed, 160 insertions(+), 106 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index 94693c877e3b..a41a3aaba220 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -22,7 +24,6 @@
  */
 
 .file "cast5-avx-x86_64-asm_64.S"
-.text
 
 .extern cast5_s1
 .extern cast5_s2
@@ -57,17 +58,19 @@
 #define RX %xmm8
 
 #define RKM  %xmm9
-#define RKRF %xmm10
-#define RKRR %xmm11
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+
+#define R32  %xmm13
+#define R1ST %xmm14
 
-#define RTMP  %xmm12
-#define RMASK %xmm13
-#define R32   %xmm14
+#define RTMP %xmm15
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -76,6 +79,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RFS1  %r8
 #define RFS1d %r8d
 #define RFS2  %r9
@@ -84,60 +94,84 @@
 #define RFS3d %r10d
 
 
-#define lookup_32bit(src, dst, op1, op2, op3) \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
 	movl		s1(, RID1, 4), dst ## d; \
 	op1		s2(, RID2, 4), dst ## d; \
-	shrq $16,	src;                     \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
 	op2		s3(, RID1, 4), dst ## d; \
 	op3		s4(, RID2, 4), dst ## d;
 
-#define F(a, x, op0, op1, op2, op3) \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
 	op0	a,	RKM,  x;                 \
-	vpslld  RKRF,	x,    RTMP;              \
-	vpsrld  RKRR,	x,    x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
 	vpor	RTMP,	x,    x;                 \
 	\
-	vpshufb	RMASK,	x,    x;                 \
-	vmovq		x,    RGI1;              \
-	vpsrldq $8,	x,    x;                 \
-	vmovq		x,    RGI2;              \
-	\
-	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
-	shrq $16,	RGI1;                    \
-	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
-	shlq $32,	RFS2;                    \
-	orq		RFS1, RFS2;              \
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
 	\
-	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
-	shrq $16,	RGI2;                    \
-	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
-	shlq $32,	RFS3;                    \
-	orq		RFS1, RFS3;              \
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
 	\
-	vmovq		RFS2, x;                 \
+	vmovq		RFS2, x;                                   \
 	vpinsrq $1,	RFS3, x, x;
 
-#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
-#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
-#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
 
-#define subround(a, b, x, n, f) \
-	F ## f(b, x);  \
-	vpxor a, x, a;
+#define subround(a1, b1, a2, b2, f) \
+	F ## f ## _2(a1, b1, a2, b2);
 
 #define round(l, r, n, f) \
 	vbroadcastss 	(km+(4*n))(CTX), RKM;        \
-	vpinsrb $0,	(kr+n)(CTX),     RKRF, RKRF; \
+	vpand		R1ST,            RKR,  RKRF; \
 	vpsubq		RKRF,            R32,  RKRR; \
-	subround(l ## 1, r ## 1, RX, n, f);          \
-	subround(l ## 2, r ## 2, RX, n, f);          \
-	subround(l ## 3, r ## 3, RX, n, f);          \
-	subround(l ## 4, r ## 4, RX, n, f);
+	vpsrldq $1,	RKR,             RKR;        \
+	subround(l ## 1, r ## 1, l ## 2, r ## 2, f); \
+	subround(l ## 3, r ## 3, l ## 4, r ## 4, f);
+
+#define enc_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR;
 
+#define dec_preload_rkr() \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		kr(CTX),                  RKR, RKR; \
+	vpshufb		.Lbswap128_mask,          RKR, RKR;
 
 #define transpose_2x4(x0, x1, t0, t1) \
 	vpunpckldq		x1, x0, t0; \
@@ -146,37 +180,47 @@
 	vpunpcklqdq		t1, t0, x0; \
 	vpunpckhqdq		t1, t0, x1;
 
-#define inpack_blocks(in, x0, x1, t0, t1) \
+#define inpack_blocks(in, x0, x1, t0, t1, rmask) \
 	vmovdqu (0*4*4)(in),	x0; \
 	vmovdqu (1*4*4)(in),	x1; \
-	vpshufb RMASK, x0,	x0; \
-	vpshufb RMASK, x1,	x1; \
+	vpshufb rmask, 	x0,	x0; \
+	vpshufb rmask, 	x1,	x1; \
 	\
 	transpose_2x4(x0, x1, t0, t1)
 
-#define outunpack_blocks(out, x0, x1, t0, t1) \
+#define outunpack_blocks(out, x0, x1, t0, t1, rmask) \
 	transpose_2x4(x0, x1, t0, t1) \
 	\
-	vpshufb RMASK,	x0, x0;           \
-	vpshufb RMASK,	x1, x1;           \
+	vpshufb rmask,	x0, x0;           \
+	vpshufb rmask,	x1, x1;           \
 	vmovdqu 	x0, (0*4*4)(out); \
 	vmovdqu		x1, (1*4*4)(out);
 
-#define outunpack_xor_blocks(out, x0, x1, t0, t1) \
+#define outunpack_xor_blocks(out, x0, x1, t0, t1, rmask) \
 	transpose_2x4(x0, x1, t0, t1) \
 	\
-	vpshufb RMASK,	x0, x0;               \
-	vpshufb RMASK,	x1, x1;               \
+	vpshufb rmask,	x0, x0;               \
+	vpshufb rmask,	x1, x1;               \
 	vpxor		(0*4*4)(out), x0, x0; \
 	vmovdqu 	x0, (0*4*4)(out);     \
 	vpxor		(1*4*4)(out), x1, x1; \
 	vmovdqu	        x1, (1*4*4)(out);
 
+.data
+
 .align 16
 .Lbswap_mask:
 	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lbswap128_mask:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
 .L32_mask:
-	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
 
 .align 16
 .global __cast5_enc_blk_16way
@@ -190,23 +234,24 @@ __cast5_enc_blk_16way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	enc_preload_rkr();
 
-	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
-	leaq (2*4*4)(%rdx), %rax;
-	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
 	round(RL, RR, 0, 1);
 	round(RR, RL, 1, 2);
@@ -221,8 +266,8 @@ __cast5_enc_blk_16way:
 	round(RL, RR, 10, 2);
 	round(RR, RL, 11, 3);
 
-	movb rr(CTX), %al;
-	testb %al, %al;
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
 	jnz __skip_enc;
 
 	round(RL, RR, 12, 1);
@@ -233,28 +278,30 @@ __cast5_enc_blk_16way:
 __skip_enc:
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
+
+	vmovdqa .Lbswap_mask, RKM;
+	leaq 1*(2*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor16;
 
-	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
 
 __enc_xor16:
-	outunpack_xor_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX);
+	outunpack_xor_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_xor_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_xor_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
 
@@ -269,25 +316,26 @@ cast5_dec_blk_16way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
+	dec_preload_rkr();
 
-	inpack_blocks(%rdx, RL1, RR1, RTMP, RX);
-	leaq (2*4*4)(%rdx), %rax;
-	inpack_blocks(%rax, RL2, RR2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL3, RR3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	inpack_blocks(%rax, RL4, RR4, RTMP, RX);
+	leaq 1*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rdx, RL1, RR1, RTMP, RX, RKM);
+	inpack_blocks(%rax, RL2, RR2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL3, RR3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%rdx), %rax;
+	inpack_blocks(%rax, RL4, RR4, RTMP, RX, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
-	movb rr(CTX), %al;
-	testb %al, %al;
+	movzbl rr(CTX), %eax;
+	testl %eax, %eax;
 	jnz __skip_dec;
 
 	round(RL, RR, 15, 1);
@@ -295,7 +343,7 @@ cast5_dec_blk_16way:
 	round(RL, RR, 13, 2);
 	round(RR, RL, 12, 1);
 
-__skip_dec:
+__dec_tail:
 	round(RL, RR, 11, 3);
 	round(RR, RL, 10, 2);
 	round(RL, RR, 9, 1);
@@ -309,14 +357,20 @@ __skip_dec:
 	round(RL, RR, 1, 2);
 	round(RR, RL, 0, 1);
 
+	vmovdqa .Lbswap_mask, RKM;
 	popq %rbx;
+	popq %rbp;
 
-	outunpack_blocks(%rsi, RR1, RL1, RTMP, RX);
-	leaq (2*4*4)(%rsi), %rax;
-	outunpack_blocks(%rax, RR2, RL2, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR3, RL3, RTMP, RX);
-	leaq (2*4*4)(%rax), %rax;
-	outunpack_blocks(%rax, RR4, RL4, RTMP, RX);
+	leaq 1*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RR1, RL1, RTMP, RX, RKM);
+	outunpack_blocks(%rax, RR2, RL2, RTMP, RX, RKM);
+	leaq 2*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR3, RL3, RTMP, RX, RKM);
+	leaq 3*(2*4*4)(%r11), %rax;
+	outunpack_blocks(%rax, RR4, RL4, RTMP, RX, RKM);
 
 	ret;
+
+__skip_dec:
+	vpsrldq $4, RKR, RKR;
+	jmp __dec_tail;
-- 
cgit v1.2.3


From c09220e1bc97d83cae445cab8dcb057fabd62361 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 14:24:54 +0300
Subject: crypto: cast6-avx - tune assembler code for more performance

Patch replaces 'movb' instructions with 'movzbl' to break false register
dependencies, interleaves instructions better for out-of-order scheduling
and merges constant 16-bit rotation with round-key variable rotation.

tcrypt ECB results:

Intel Core i5-2450M:

size    old-vs-new      new-vs-generic  old-vs-generic
        enc     dec     enc     dec     enc     dec
256     1.13x   1.19x   2.05x   2.17x   1.82x   1.82x
1k      1.18x   1.21x   2.26x   2.33x   1.93x   1.93x
8k      1.19x   1.19x   2.32x   2.33x   1.95x   1.95x

[v2]
 - Do instruction interleaving another way to avoid adding new FPU<=>CPU
   register moves as these cause performance drop on Bulldozer.
 - Improvements to round-key variable rotation handling.
 - Further interleaving improvements for better out-of-order scheduling.

Cc: Johannes Goetzfried <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 276 ++++++++++++++++++------------
 1 file changed, 162 insertions(+), 114 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
index d258ce0d2e06..218d283772f4 100644
--- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -22,7 +24,6 @@
  */
 
 .file "cast6-avx-x86_64-asm_64.S"
-.text
 
 .extern cast6_s1
 .extern cast6_s2
@@ -54,20 +55,21 @@
 #define RC2 %xmm6
 #define RD2 %xmm7
 
-#define RX %xmm8
+#define RX  %xmm8
 
 #define RKM  %xmm9
-#define RKRF %xmm10
-#define RKRR %xmm11
+#define RKR  %xmm10
+#define RKRF %xmm11
+#define RKRR %xmm12
+#define R32  %xmm13
+#define R1ST %xmm14
 
-#define RTMP  %xmm12
-#define RMASK %xmm13
-#define R32   %xmm14
+#define RTMP %xmm15
 
-#define RID1  %rax
-#define RID1b %al
-#define RID2  %rbx
-#define RID2b %bl
+#define RID1  %rbp
+#define RID1d %ebp
+#define RID2  %rsi
+#define RID2d %esi
 
 #define RGI1   %rdx
 #define RGI1bl %dl
@@ -76,6 +78,13 @@
 #define RGI2bl %cl
 #define RGI2bh %ch
 
+#define RGI3   %rax
+#define RGI3bl %al
+#define RGI3bh %ah
+#define RGI4   %rbx
+#define RGI4bl %bl
+#define RGI4bh %bh
+
 #define RFS1  %r8
 #define RFS1d %r8d
 #define RFS2  %r9
@@ -84,95 +93,106 @@
 #define RFS3d %r10d
 
 
-#define lookup_32bit(src, dst, op1, op2, op3) \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+#define lookup_32bit(src, dst, op1, op2, op3, interleave_op, il_reg) \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	shrq $16,	src;                     \
 	movl		s1(, RID1, 4), dst ## d; \
 	op1		s2(, RID2, 4), dst ## d; \
-	shrq $16,	src;                     \
-	movb		src ## bl,     RID1b;    \
-	movb		src ## bh,     RID2b;    \
+	movzbl		src ## bh,     RID1d;    \
+	movzbl		src ## bl,     RID2d;    \
+	interleave_op(il_reg);			 \
 	op2		s3(, RID1, 4), dst ## d; \
 	op3		s4(, RID2, 4), dst ## d;
 
-#define F(a, x, op0, op1, op2, op3) \
+#define dummy(d) /* do nothing */
+
+#define shr_next(reg) \
+	shrq $16,	reg;
+
+#define F_head(a, x, gi1, gi2, op0) \
 	op0	a,	RKM,  x;                 \
-	vpslld  RKRF,	x,    RTMP;              \
-	vpsrld  RKRR,	x,    x;                 \
+	vpslld	RKRF,	x,    RTMP;              \
+	vpsrld	RKRR,	x,    x;                 \
 	vpor	RTMP,	x,    x;                 \
 	\
-	vpshufb	RMASK,	x,    x;                 \
-	vmovq		x,    RGI1;              \
-	vpsrldq $8,	x,    x;                 \
-	vmovq		x,    RGI2;              \
-	\
-	lookup_32bit(RGI1, RFS1, op1, op2, op3); \
-	shrq $16,	RGI1;                    \
-	lookup_32bit(RGI1, RFS2, op1, op2, op3); \
-	shlq $32,	RFS2;                    \
-	orq		RFS1, RFS2;              \
+	vmovq		x,    gi1;               \
+	vpextrq $1,	x,    gi2;
+
+#define F_tail(a, x, gi1, gi2, op1, op2, op3) \
+	lookup_32bit(##gi1, RFS1, op1, op2, op3, shr_next, ##gi1); \
+	lookup_32bit(##gi2, RFS3, op1, op2, op3, shr_next, ##gi2); \
 	\
-	lookup_32bit(RGI2, RFS1, op1, op2, op3); \
-	shrq $16,	RGI2;                    \
-	lookup_32bit(RGI2, RFS3, op1, op2, op3); \
-	shlq $32,	RFS3;                    \
-	orq		RFS1, RFS3;              \
+	lookup_32bit(##gi1, RFS2, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS2;                                      \
+	orq		RFS1, RFS2;                                \
+	lookup_32bit(##gi2, RFS1, op1, op2, op3, dummy, none);     \
+	shlq $32,	RFS1;                                      \
+	orq		RFS1, RFS3;                                \
 	\
-	vmovq		RFS2, x;                 \
+	vmovq		RFS2, x;                                   \
 	vpinsrq $1,	RFS3, x, x;
 
-#define F1(b, x) F(b, x, vpaddd, xorl, subl, addl)
-#define F2(b, x) F(b, x, vpxor,  subl, addl, xorl)
-#define F3(b, x) F(b, x, vpsubd, addl, xorl, subl)
+#define F_2(a1, b1, a2, b2, op0, op1, op2, op3) \
+	F_head(b1, RX, RGI1, RGI2, op0);              \
+	F_head(b2, RX, RGI3, RGI4, op0);              \
+	\
+	F_tail(b1, RX, RGI1, RGI2, op1, op2, op3);    \
+	F_tail(b2, RTMP, RGI3, RGI4, op1, op2, op3);  \
+	\
+	vpxor		a1, RX,   a1;                 \
+	vpxor		a2, RTMP, a2;
+
+#define F1_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpaddd, xorl, subl, addl)
+#define F2_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpxor, subl, addl, xorl)
+#define F3_2(a1, b1, a2, b2) \
+	F_2(a1, b1, a2, b2, vpsubd, addl, xorl, subl)
 
-#define qop(in, out, x, f) \
-	F ## f(in ## 1, x);          \
-	vpxor out ## 1, x, out ## 1; \
-	F ## f(in ## 2, x);          \
-	vpxor out ## 2, x, out ## 2; \
+#define qop(in, out, f) \
+	F ## f ## _2(out ## 1, in ## 1, out ## 2, in ## 2);
+
+#define get_round_keys(nn) \
+	vbroadcastss	(km+(4*(nn)))(CTX), RKM;        \
+	vpand		R1ST,               RKR,  RKRF; \
+	vpsubq		RKRF,               R32,  RKRR; \
+	vpsrldq $1,	RKR,                RKR;
 
 #define Q(n) \
-	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RD, RC, RX, 1);                                \
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);        \
 	\
-	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RC, RB, RX, 2);                                \
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
 	\
-	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RB, RA, RX, 3);                                \
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
 	\
-	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RA, RD, RX, 1);
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);
 
 #define QBAR(n) \
-	vbroadcastss	(km+(4*(4*n+3)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+3))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RA, RD, RX, 1);                                \
+	get_round_keys(4*n+3); \
+	qop(RA, RD, 1);        \
 	\
-	vbroadcastss	(km+(4*(4*n+2)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+2))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RB, RA, RX, 3);                                \
+	get_round_keys(4*n+2); \
+	qop(RB, RA, 3);        \
 	\
-	vbroadcastss	(km+(4*(4*n+1)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+1))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RC, RB, RX, 2);                                \
+	get_round_keys(4*n+1); \
+	qop(RC, RB, 2);        \
 	\
-	vbroadcastss	(km+(4*(4*n+0)))(CTX), RKM;        \
-	vpinsrb $0,	(kr+(4*n+0))(CTX),     RKRF, RKRF; \
-	vpsubq		RKRF,                  R32,  RKRR; \
-	qop(RD, RC, RX, 1);
+	get_round_keys(4*n+0); \
+	qop(RD, RC, 1);
+
+#define shuffle(mask) \
+	vpshufb		mask,            RKR, RKR;
 
+#define preload_rkr(n, do_mask, mask) \
+	vbroadcastss	.L16_mask,                RKR;      \
+	/* add 16-bit rotation to key rotations (mod 32) */ \
+	vpxor		(kr+n*16)(CTX),           RKR, RKR; \
+	do_mask(mask);
 
 #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	vpunpckldq		x1, x0, t0; \
@@ -185,37 +205,37 @@
 	vpunpcklqdq		x3, t2, x2; \
 	vpunpckhqdq		x3, t2, x3;
 
-#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2) \
+#define inpack_blocks(in, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	vmovdqu (0*4*4)(in),	x0; \
 	vmovdqu (1*4*4)(in),	x1; \
 	vmovdqu (2*4*4)(in),	x2; \
 	vmovdqu (3*4*4)(in),	x3; \
-	vpshufb RMASK, x0,	x0; \
-	vpshufb RMASK, x1,	x1; \
-	vpshufb RMASK, x2,	x2; \
-	vpshufb RMASK, x3,	x3; \
+	vpshufb rmask, x0,	x0; \
+	vpshufb rmask, x1,	x1; \
+	vpshufb rmask, x2,	x2; \
+	vpshufb rmask, x3,	x3; \
 	\
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
-#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+#define outunpack_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	\
-	vpshufb RMASK,		x0, x0;       \
-	vpshufb RMASK,		x1, x1;       \
-	vpshufb RMASK,		x2, x2;       \
-	vpshufb RMASK,		x3, x3;       \
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
 	vmovdqu x0,		(0*4*4)(out); \
 	vmovdqu	x1,		(1*4*4)(out); \
 	vmovdqu	x2,		(2*4*4)(out); \
 	vmovdqu	x3,		(3*4*4)(out);
 
-#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2) \
+#define outunpack_xor_blocks(out, x0, x1, x2, x3, t0, t1, t2, rmask) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
 	\
-	vpshufb RMASK,		x0, x0;       \
-	vpshufb RMASK,		x1, x1;       \
-	vpshufb RMASK,		x2, x2;       \
-	vpshufb RMASK,		x3, x3;       \
+	vpshufb rmask,		x0, x0;       \
+	vpshufb rmask,		x1, x1;       \
+	vpshufb rmask,		x2, x2;       \
+	vpshufb rmask,		x3, x3;       \
 	vpxor (0*4*4)(out),	x0, x0;       \
 	vmovdqu	x0,		(0*4*4)(out); \
 	vpxor (1*4*4)(out),	x1, x1;       \
@@ -225,11 +245,29 @@
 	vpxor (3*4*4)(out),	x3, x3;       \
 	vmovdqu x3,		(3*4*4)(out);
 
+.data
+
 .align 16
 .Lbswap_mask:
 	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_Q_Q_QBAR_QBAR:
+	.byte 0, 1, 2, 3, 4, 5, 6, 7, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_enc_QBAR_QBAR_QBAR_QBAR:
+	.byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lrkr_dec_Q_Q_Q_Q:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+.Lrkr_dec_Q_Q_QBAR_QBAR:
+	.byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0
+.Lrkr_dec_QBAR_QBAR_QBAR_QBAR:
+	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.L16_mask:
+	.byte 16, 16, 16, 16
 .L32_mask:
-	.byte 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ,0, 0, 0, 0, 0
+	.byte 32, 0, 0, 0
+.Lfirst_mask:
+	.byte 0x1f, 0, 0, 0
+
+.text
 
 .align 16
 .global __cast6_enc_blk_8way
@@ -243,28 +281,31 @@ __cast6_enc_blk_8way:
 	 *	%rcx: bool, if true: xor output
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 	pushq %rcx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
+	preload_rkr(0, dummy, none);
 	Q(0);
 	Q(1);
 	Q(2);
 	Q(3);
+	preload_rkr(1, shuffle, .Lrkr_enc_Q_Q_QBAR_QBAR);
 	Q(4);
 	Q(5);
 	QBAR(6);
 	QBAR(7);
+	preload_rkr(2, shuffle, .Lrkr_enc_QBAR_QBAR_QBAR_QBAR);
 	QBAR(8);
 	QBAR(9);
 	QBAR(10);
@@ -272,20 +313,22 @@ __cast6_enc_blk_8way:
 
 	popq %rcx;
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
 
 	testb %cl, %cl;
 	jnz __enc_xor8;
 
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
 
 __enc_xor8:
-	outunpack_xor_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	outunpack_xor_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_xor_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
 
@@ -300,36 +343,41 @@ cast6_dec_blk_8way:
 	 *	%rdx: src
 	 */
 
+	pushq %rbp;
 	pushq %rbx;
 
-	vmovdqu .Lbswap_mask, RMASK;
-	vmovdqu .L32_mask, R32;
-	vpxor RKRF, RKRF, RKRF;
+	vmovdqa .Lbswap_mask, RKM;
+	vmovd .Lfirst_mask, R1ST;
+	vmovd .L32_mask, R32;
 
 	leaq (4*4*4)(%rdx), %rax;
-	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	inpack_blocks(%rdx, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	inpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
-	xorq RID1, RID1;
-	xorq RID2, RID2;
+	movq %rsi, %r11;
 
+	preload_rkr(2, shuffle, .Lrkr_dec_Q_Q_Q_Q);
 	Q(11);
 	Q(10);
 	Q(9);
 	Q(8);
+	preload_rkr(1, shuffle, .Lrkr_dec_Q_Q_QBAR_QBAR);
 	Q(7);
 	Q(6);
 	QBAR(5);
 	QBAR(4);
+	preload_rkr(0, shuffle, .Lrkr_dec_QBAR_QBAR_QBAR_QBAR);
 	QBAR(3);
 	QBAR(2);
 	QBAR(1);
 	QBAR(0);
 
 	popq %rbx;
+	popq %rbp;
 
-	leaq (4*4*4)(%rsi), %rax;
-	outunpack_blocks(%rsi, RA1, RB1, RC1, RD1, RTMP, RX, RKM);
-	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKM);
+	vmovdqa .Lbswap_mask, RKM;
+	leaq (4*4*4)(%r11), %rax;
+	outunpack_blocks(%r11, RA1, RB1, RC1, RD1, RTMP, RX, RKRF, RKM);
+	outunpack_blocks(%rax, RA2, RB2, RC2, RD2, RTMP, RX, RKRF, RKM);
 
 	ret;
-- 
cgit v1.2.3


From 1ffb72a39a92c1a03b3c732280e924c02c509cd3 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Tue, 28 Aug 2012 16:46:59 +0300
Subject: crypto: camellia-x86_64 - fix sparse warnings (constant is so big)

Fix "constant 0xXXXXXXXXXXXXXXXX is so big it's unsigned long" sparse warnings.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/camellia_glue.c | 1376 +++++++++++++++++++--------------------
 1 file changed, 688 insertions(+), 688 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index 7a74d7bb326d..42ffd2bbab5b 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -92,715 +92,715 @@ static void camellia_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 
 /* camellia sboxes */
 const u64 camellia_sp10011110[256] = {
-	0x7000007070707000, 0x8200008282828200, 0x2c00002c2c2c2c00,
-	0xec0000ecececec00, 0xb30000b3b3b3b300, 0x2700002727272700,
-	0xc00000c0c0c0c000, 0xe50000e5e5e5e500, 0xe40000e4e4e4e400,
-	0x8500008585858500, 0x5700005757575700, 0x3500003535353500,
-	0xea0000eaeaeaea00, 0x0c00000c0c0c0c00, 0xae0000aeaeaeae00,
-	0x4100004141414100, 0x2300002323232300, 0xef0000efefefef00,
-	0x6b00006b6b6b6b00, 0x9300009393939300, 0x4500004545454500,
-	0x1900001919191900, 0xa50000a5a5a5a500, 0x2100002121212100,
-	0xed0000edededed00, 0x0e00000e0e0e0e00, 0x4f00004f4f4f4f00,
-	0x4e00004e4e4e4e00, 0x1d00001d1d1d1d00, 0x6500006565656500,
-	0x9200009292929200, 0xbd0000bdbdbdbd00, 0x8600008686868600,
-	0xb80000b8b8b8b800, 0xaf0000afafafaf00, 0x8f00008f8f8f8f00,
-	0x7c00007c7c7c7c00, 0xeb0000ebebebeb00, 0x1f00001f1f1f1f00,
-	0xce0000cececece00, 0x3e00003e3e3e3e00, 0x3000003030303000,
-	0xdc0000dcdcdcdc00, 0x5f00005f5f5f5f00, 0x5e00005e5e5e5e00,
-	0xc50000c5c5c5c500, 0x0b00000b0b0b0b00, 0x1a00001a1a1a1a00,
-	0xa60000a6a6a6a600, 0xe10000e1e1e1e100, 0x3900003939393900,
-	0xca0000cacacaca00, 0xd50000d5d5d5d500, 0x4700004747474700,
-	0x5d00005d5d5d5d00, 0x3d00003d3d3d3d00, 0xd90000d9d9d9d900,
-	0x0100000101010100, 0x5a00005a5a5a5a00, 0xd60000d6d6d6d600,
-	0x5100005151515100, 0x5600005656565600, 0x6c00006c6c6c6c00,
-	0x4d00004d4d4d4d00, 0x8b00008b8b8b8b00, 0x0d00000d0d0d0d00,
-	0x9a00009a9a9a9a00, 0x6600006666666600, 0xfb0000fbfbfbfb00,
-	0xcc0000cccccccc00, 0xb00000b0b0b0b000, 0x2d00002d2d2d2d00,
-	0x7400007474747400, 0x1200001212121200, 0x2b00002b2b2b2b00,
-	0x2000002020202000, 0xf00000f0f0f0f000, 0xb10000b1b1b1b100,
-	0x8400008484848400, 0x9900009999999900, 0xdf0000dfdfdfdf00,
-	0x4c00004c4c4c4c00, 0xcb0000cbcbcbcb00, 0xc20000c2c2c2c200,
-	0x3400003434343400, 0x7e00007e7e7e7e00, 0x7600007676767600,
-	0x0500000505050500, 0x6d00006d6d6d6d00, 0xb70000b7b7b7b700,
-	0xa90000a9a9a9a900, 0x3100003131313100, 0xd10000d1d1d1d100,
-	0x1700001717171700, 0x0400000404040400, 0xd70000d7d7d7d700,
-	0x1400001414141400, 0x5800005858585800, 0x3a00003a3a3a3a00,
-	0x6100006161616100, 0xde0000dededede00, 0x1b00001b1b1b1b00,
-	0x1100001111111100, 0x1c00001c1c1c1c00, 0x3200003232323200,
-	0x0f00000f0f0f0f00, 0x9c00009c9c9c9c00, 0x1600001616161600,
-	0x5300005353535300, 0x1800001818181800, 0xf20000f2f2f2f200,
-	0x2200002222222200, 0xfe0000fefefefe00, 0x4400004444444400,
-	0xcf0000cfcfcfcf00, 0xb20000b2b2b2b200, 0xc30000c3c3c3c300,
-	0xb50000b5b5b5b500, 0x7a00007a7a7a7a00, 0x9100009191919100,
-	0x2400002424242400, 0x0800000808080800, 0xe80000e8e8e8e800,
-	0xa80000a8a8a8a800, 0x6000006060606000, 0xfc0000fcfcfcfc00,
-	0x6900006969696900, 0x5000005050505000, 0xaa0000aaaaaaaa00,
-	0xd00000d0d0d0d000, 0xa00000a0a0a0a000, 0x7d00007d7d7d7d00,
-	0xa10000a1a1a1a100, 0x8900008989898900, 0x6200006262626200,
-	0x9700009797979700, 0x5400005454545400, 0x5b00005b5b5b5b00,
-	0x1e00001e1e1e1e00, 0x9500009595959500, 0xe00000e0e0e0e000,
-	0xff0000ffffffff00, 0x6400006464646400, 0xd20000d2d2d2d200,
-	0x1000001010101000, 0xc40000c4c4c4c400, 0x0000000000000000,
-	0x4800004848484800, 0xa30000a3a3a3a300, 0xf70000f7f7f7f700,
-	0x7500007575757500, 0xdb0000dbdbdbdb00, 0x8a00008a8a8a8a00,
-	0x0300000303030300, 0xe60000e6e6e6e600, 0xda0000dadadada00,
-	0x0900000909090900, 0x3f00003f3f3f3f00, 0xdd0000dddddddd00,
-	0x9400009494949400, 0x8700008787878700, 0x5c00005c5c5c5c00,
-	0x8300008383838300, 0x0200000202020200, 0xcd0000cdcdcdcd00,
-	0x4a00004a4a4a4a00, 0x9000009090909000, 0x3300003333333300,
-	0x7300007373737300, 0x6700006767676700, 0xf60000f6f6f6f600,
-	0xf30000f3f3f3f300, 0x9d00009d9d9d9d00, 0x7f00007f7f7f7f00,
-	0xbf0000bfbfbfbf00, 0xe20000e2e2e2e200, 0x5200005252525200,
-	0x9b00009b9b9b9b00, 0xd80000d8d8d8d800, 0x2600002626262600,
-	0xc80000c8c8c8c800, 0x3700003737373700, 0xc60000c6c6c6c600,
-	0x3b00003b3b3b3b00, 0x8100008181818100, 0x9600009696969600,
-	0x6f00006f6f6f6f00, 0x4b00004b4b4b4b00, 0x1300001313131300,
-	0xbe0000bebebebe00, 0x6300006363636300, 0x2e00002e2e2e2e00,
-	0xe90000e9e9e9e900, 0x7900007979797900, 0xa70000a7a7a7a700,
-	0x8c00008c8c8c8c00, 0x9f00009f9f9f9f00, 0x6e00006e6e6e6e00,
-	0xbc0000bcbcbcbc00, 0x8e00008e8e8e8e00, 0x2900002929292900,
-	0xf50000f5f5f5f500, 0xf90000f9f9f9f900, 0xb60000b6b6b6b600,
-	0x2f00002f2f2f2f00, 0xfd0000fdfdfdfd00, 0xb40000b4b4b4b400,
-	0x5900005959595900, 0x7800007878787800, 0x9800009898989800,
-	0x0600000606060600, 0x6a00006a6a6a6a00, 0xe70000e7e7e7e700,
-	0x4600004646464600, 0x7100007171717100, 0xba0000babababa00,
-	0xd40000d4d4d4d400, 0x2500002525252500, 0xab0000abababab00,
-	0x4200004242424200, 0x8800008888888800, 0xa20000a2a2a2a200,
-	0x8d00008d8d8d8d00, 0xfa0000fafafafa00, 0x7200007272727200,
-	0x0700000707070700, 0xb90000b9b9b9b900, 0x5500005555555500,
-	0xf80000f8f8f8f800, 0xee0000eeeeeeee00, 0xac0000acacacac00,
-	0x0a00000a0a0a0a00, 0x3600003636363600, 0x4900004949494900,
-	0x2a00002a2a2a2a00, 0x6800006868686800, 0x3c00003c3c3c3c00,
-	0x3800003838383800, 0xf10000f1f1f1f100, 0xa40000a4a4a4a400,
-	0x4000004040404000, 0x2800002828282800, 0xd30000d3d3d3d300,
-	0x7b00007b7b7b7b00, 0xbb0000bbbbbbbb00, 0xc90000c9c9c9c900,
-	0x4300004343434300, 0xc10000c1c1c1c100, 0x1500001515151500,
-	0xe30000e3e3e3e300, 0xad0000adadadad00, 0xf40000f4f4f4f400,
-	0x7700007777777700, 0xc70000c7c7c7c700, 0x8000008080808000,
-	0x9e00009e9e9e9e00,
+	0x7000007070707000ULL, 0x8200008282828200ULL, 0x2c00002c2c2c2c00ULL,
+	0xec0000ecececec00ULL, 0xb30000b3b3b3b300ULL, 0x2700002727272700ULL,
+	0xc00000c0c0c0c000ULL, 0xe50000e5e5e5e500ULL, 0xe40000e4e4e4e400ULL,
+	0x8500008585858500ULL, 0x5700005757575700ULL, 0x3500003535353500ULL,
+	0xea0000eaeaeaea00ULL, 0x0c00000c0c0c0c00ULL, 0xae0000aeaeaeae00ULL,
+	0x4100004141414100ULL, 0x2300002323232300ULL, 0xef0000efefefef00ULL,
+	0x6b00006b6b6b6b00ULL, 0x9300009393939300ULL, 0x4500004545454500ULL,
+	0x1900001919191900ULL, 0xa50000a5a5a5a500ULL, 0x2100002121212100ULL,
+	0xed0000edededed00ULL, 0x0e00000e0e0e0e00ULL, 0x4f00004f4f4f4f00ULL,
+	0x4e00004e4e4e4e00ULL, 0x1d00001d1d1d1d00ULL, 0x6500006565656500ULL,
+	0x9200009292929200ULL, 0xbd0000bdbdbdbd00ULL, 0x8600008686868600ULL,
+	0xb80000b8b8b8b800ULL, 0xaf0000afafafaf00ULL, 0x8f00008f8f8f8f00ULL,
+	0x7c00007c7c7c7c00ULL, 0xeb0000ebebebeb00ULL, 0x1f00001f1f1f1f00ULL,
+	0xce0000cececece00ULL, 0x3e00003e3e3e3e00ULL, 0x3000003030303000ULL,
+	0xdc0000dcdcdcdc00ULL, 0x5f00005f5f5f5f00ULL, 0x5e00005e5e5e5e00ULL,
+	0xc50000c5c5c5c500ULL, 0x0b00000b0b0b0b00ULL, 0x1a00001a1a1a1a00ULL,
+	0xa60000a6a6a6a600ULL, 0xe10000e1e1e1e100ULL, 0x3900003939393900ULL,
+	0xca0000cacacaca00ULL, 0xd50000d5d5d5d500ULL, 0x4700004747474700ULL,
+	0x5d00005d5d5d5d00ULL, 0x3d00003d3d3d3d00ULL, 0xd90000d9d9d9d900ULL,
+	0x0100000101010100ULL, 0x5a00005a5a5a5a00ULL, 0xd60000d6d6d6d600ULL,
+	0x5100005151515100ULL, 0x5600005656565600ULL, 0x6c00006c6c6c6c00ULL,
+	0x4d00004d4d4d4d00ULL, 0x8b00008b8b8b8b00ULL, 0x0d00000d0d0d0d00ULL,
+	0x9a00009a9a9a9a00ULL, 0x6600006666666600ULL, 0xfb0000fbfbfbfb00ULL,
+	0xcc0000cccccccc00ULL, 0xb00000b0b0b0b000ULL, 0x2d00002d2d2d2d00ULL,
+	0x7400007474747400ULL, 0x1200001212121200ULL, 0x2b00002b2b2b2b00ULL,
+	0x2000002020202000ULL, 0xf00000f0f0f0f000ULL, 0xb10000b1b1b1b100ULL,
+	0x8400008484848400ULL, 0x9900009999999900ULL, 0xdf0000dfdfdfdf00ULL,
+	0x4c00004c4c4c4c00ULL, 0xcb0000cbcbcbcb00ULL, 0xc20000c2c2c2c200ULL,
+	0x3400003434343400ULL, 0x7e00007e7e7e7e00ULL, 0x7600007676767600ULL,
+	0x0500000505050500ULL, 0x6d00006d6d6d6d00ULL, 0xb70000b7b7b7b700ULL,
+	0xa90000a9a9a9a900ULL, 0x3100003131313100ULL, 0xd10000d1d1d1d100ULL,
+	0x1700001717171700ULL, 0x0400000404040400ULL, 0xd70000d7d7d7d700ULL,
+	0x1400001414141400ULL, 0x5800005858585800ULL, 0x3a00003a3a3a3a00ULL,
+	0x6100006161616100ULL, 0xde0000dededede00ULL, 0x1b00001b1b1b1b00ULL,
+	0x1100001111111100ULL, 0x1c00001c1c1c1c00ULL, 0x3200003232323200ULL,
+	0x0f00000f0f0f0f00ULL, 0x9c00009c9c9c9c00ULL, 0x1600001616161600ULL,
+	0x5300005353535300ULL, 0x1800001818181800ULL, 0xf20000f2f2f2f200ULL,
+	0x2200002222222200ULL, 0xfe0000fefefefe00ULL, 0x4400004444444400ULL,
+	0xcf0000cfcfcfcf00ULL, 0xb20000b2b2b2b200ULL, 0xc30000c3c3c3c300ULL,
+	0xb50000b5b5b5b500ULL, 0x7a00007a7a7a7a00ULL, 0x9100009191919100ULL,
+	0x2400002424242400ULL, 0x0800000808080800ULL, 0xe80000e8e8e8e800ULL,
+	0xa80000a8a8a8a800ULL, 0x6000006060606000ULL, 0xfc0000fcfcfcfc00ULL,
+	0x6900006969696900ULL, 0x5000005050505000ULL, 0xaa0000aaaaaaaa00ULL,
+	0xd00000d0d0d0d000ULL, 0xa00000a0a0a0a000ULL, 0x7d00007d7d7d7d00ULL,
+	0xa10000a1a1a1a100ULL, 0x8900008989898900ULL, 0x6200006262626200ULL,
+	0x9700009797979700ULL, 0x5400005454545400ULL, 0x5b00005b5b5b5b00ULL,
+	0x1e00001e1e1e1e00ULL, 0x9500009595959500ULL, 0xe00000e0e0e0e000ULL,
+	0xff0000ffffffff00ULL, 0x6400006464646400ULL, 0xd20000d2d2d2d200ULL,
+	0x1000001010101000ULL, 0xc40000c4c4c4c400ULL, 0x0000000000000000ULL,
+	0x4800004848484800ULL, 0xa30000a3a3a3a300ULL, 0xf70000f7f7f7f700ULL,
+	0x7500007575757500ULL, 0xdb0000dbdbdbdb00ULL, 0x8a00008a8a8a8a00ULL,
+	0x0300000303030300ULL, 0xe60000e6e6e6e600ULL, 0xda0000dadadada00ULL,
+	0x0900000909090900ULL, 0x3f00003f3f3f3f00ULL, 0xdd0000dddddddd00ULL,
+	0x9400009494949400ULL, 0x8700008787878700ULL, 0x5c00005c5c5c5c00ULL,
+	0x8300008383838300ULL, 0x0200000202020200ULL, 0xcd0000cdcdcdcd00ULL,
+	0x4a00004a4a4a4a00ULL, 0x9000009090909000ULL, 0x3300003333333300ULL,
+	0x7300007373737300ULL, 0x6700006767676700ULL, 0xf60000f6f6f6f600ULL,
+	0xf30000f3f3f3f300ULL, 0x9d00009d9d9d9d00ULL, 0x7f00007f7f7f7f00ULL,
+	0xbf0000bfbfbfbf00ULL, 0xe20000e2e2e2e200ULL, 0x5200005252525200ULL,
+	0x9b00009b9b9b9b00ULL, 0xd80000d8d8d8d800ULL, 0x2600002626262600ULL,
+	0xc80000c8c8c8c800ULL, 0x3700003737373700ULL, 0xc60000c6c6c6c600ULL,
+	0x3b00003b3b3b3b00ULL, 0x8100008181818100ULL, 0x9600009696969600ULL,
+	0x6f00006f6f6f6f00ULL, 0x4b00004b4b4b4b00ULL, 0x1300001313131300ULL,
+	0xbe0000bebebebe00ULL, 0x6300006363636300ULL, 0x2e00002e2e2e2e00ULL,
+	0xe90000e9e9e9e900ULL, 0x7900007979797900ULL, 0xa70000a7a7a7a700ULL,
+	0x8c00008c8c8c8c00ULL, 0x9f00009f9f9f9f00ULL, 0x6e00006e6e6e6e00ULL,
+	0xbc0000bcbcbcbc00ULL, 0x8e00008e8e8e8e00ULL, 0x2900002929292900ULL,
+	0xf50000f5f5f5f500ULL, 0xf90000f9f9f9f900ULL, 0xb60000b6b6b6b600ULL,
+	0x2f00002f2f2f2f00ULL, 0xfd0000fdfdfdfd00ULL, 0xb40000b4b4b4b400ULL,
+	0x5900005959595900ULL, 0x7800007878787800ULL, 0x9800009898989800ULL,
+	0x0600000606060600ULL, 0x6a00006a6a6a6a00ULL, 0xe70000e7e7e7e700ULL,
+	0x4600004646464600ULL, 0x7100007171717100ULL, 0xba0000babababa00ULL,
+	0xd40000d4d4d4d400ULL, 0x2500002525252500ULL, 0xab0000abababab00ULL,
+	0x4200004242424200ULL, 0x8800008888888800ULL, 0xa20000a2a2a2a200ULL,
+	0x8d00008d8d8d8d00ULL, 0xfa0000fafafafa00ULL, 0x7200007272727200ULL,
+	0x0700000707070700ULL, 0xb90000b9b9b9b900ULL, 0x5500005555555500ULL,
+	0xf80000f8f8f8f800ULL, 0xee0000eeeeeeee00ULL, 0xac0000acacacac00ULL,
+	0x0a00000a0a0a0a00ULL, 0x3600003636363600ULL, 0x4900004949494900ULL,
+	0x2a00002a2a2a2a00ULL, 0x6800006868686800ULL, 0x3c00003c3c3c3c00ULL,
+	0x3800003838383800ULL, 0xf10000f1f1f1f100ULL, 0xa40000a4a4a4a400ULL,
+	0x4000004040404000ULL, 0x2800002828282800ULL, 0xd30000d3d3d3d300ULL,
+	0x7b00007b7b7b7b00ULL, 0xbb0000bbbbbbbb00ULL, 0xc90000c9c9c9c900ULL,
+	0x4300004343434300ULL, 0xc10000c1c1c1c100ULL, 0x1500001515151500ULL,
+	0xe30000e3e3e3e300ULL, 0xad0000adadadad00ULL, 0xf40000f4f4f4f400ULL,
+	0x7700007777777700ULL, 0xc70000c7c7c7c700ULL, 0x8000008080808000ULL,
+	0x9e00009e9e9e9e00ULL,
 };
 
 const u64 camellia_sp22000222[256] = {
-	0xe0e0000000e0e0e0, 0x0505000000050505, 0x5858000000585858,
-	0xd9d9000000d9d9d9, 0x6767000000676767, 0x4e4e0000004e4e4e,
-	0x8181000000818181, 0xcbcb000000cbcbcb, 0xc9c9000000c9c9c9,
-	0x0b0b0000000b0b0b, 0xaeae000000aeaeae, 0x6a6a0000006a6a6a,
-	0xd5d5000000d5d5d5, 0x1818000000181818, 0x5d5d0000005d5d5d,
-	0x8282000000828282, 0x4646000000464646, 0xdfdf000000dfdfdf,
-	0xd6d6000000d6d6d6, 0x2727000000272727, 0x8a8a0000008a8a8a,
-	0x3232000000323232, 0x4b4b0000004b4b4b, 0x4242000000424242,
-	0xdbdb000000dbdbdb, 0x1c1c0000001c1c1c, 0x9e9e0000009e9e9e,
-	0x9c9c0000009c9c9c, 0x3a3a0000003a3a3a, 0xcaca000000cacaca,
-	0x2525000000252525, 0x7b7b0000007b7b7b, 0x0d0d0000000d0d0d,
-	0x7171000000717171, 0x5f5f0000005f5f5f, 0x1f1f0000001f1f1f,
-	0xf8f8000000f8f8f8, 0xd7d7000000d7d7d7, 0x3e3e0000003e3e3e,
-	0x9d9d0000009d9d9d, 0x7c7c0000007c7c7c, 0x6060000000606060,
-	0xb9b9000000b9b9b9, 0xbebe000000bebebe, 0xbcbc000000bcbcbc,
-	0x8b8b0000008b8b8b, 0x1616000000161616, 0x3434000000343434,
-	0x4d4d0000004d4d4d, 0xc3c3000000c3c3c3, 0x7272000000727272,
-	0x9595000000959595, 0xabab000000ababab, 0x8e8e0000008e8e8e,
-	0xbaba000000bababa, 0x7a7a0000007a7a7a, 0xb3b3000000b3b3b3,
-	0x0202000000020202, 0xb4b4000000b4b4b4, 0xadad000000adadad,
-	0xa2a2000000a2a2a2, 0xacac000000acacac, 0xd8d8000000d8d8d8,
-	0x9a9a0000009a9a9a, 0x1717000000171717, 0x1a1a0000001a1a1a,
-	0x3535000000353535, 0xcccc000000cccccc, 0xf7f7000000f7f7f7,
-	0x9999000000999999, 0x6161000000616161, 0x5a5a0000005a5a5a,
-	0xe8e8000000e8e8e8, 0x2424000000242424, 0x5656000000565656,
-	0x4040000000404040, 0xe1e1000000e1e1e1, 0x6363000000636363,
-	0x0909000000090909, 0x3333000000333333, 0xbfbf000000bfbfbf,
-	0x9898000000989898, 0x9797000000979797, 0x8585000000858585,
-	0x6868000000686868, 0xfcfc000000fcfcfc, 0xecec000000ececec,
-	0x0a0a0000000a0a0a, 0xdada000000dadada, 0x6f6f0000006f6f6f,
-	0x5353000000535353, 0x6262000000626262, 0xa3a3000000a3a3a3,
-	0x2e2e0000002e2e2e, 0x0808000000080808, 0xafaf000000afafaf,
-	0x2828000000282828, 0xb0b0000000b0b0b0, 0x7474000000747474,
-	0xc2c2000000c2c2c2, 0xbdbd000000bdbdbd, 0x3636000000363636,
-	0x2222000000222222, 0x3838000000383838, 0x6464000000646464,
-	0x1e1e0000001e1e1e, 0x3939000000393939, 0x2c2c0000002c2c2c,
-	0xa6a6000000a6a6a6, 0x3030000000303030, 0xe5e5000000e5e5e5,
-	0x4444000000444444, 0xfdfd000000fdfdfd, 0x8888000000888888,
-	0x9f9f0000009f9f9f, 0x6565000000656565, 0x8787000000878787,
-	0x6b6b0000006b6b6b, 0xf4f4000000f4f4f4, 0x2323000000232323,
-	0x4848000000484848, 0x1010000000101010, 0xd1d1000000d1d1d1,
-	0x5151000000515151, 0xc0c0000000c0c0c0, 0xf9f9000000f9f9f9,
-	0xd2d2000000d2d2d2, 0xa0a0000000a0a0a0, 0x5555000000555555,
-	0xa1a1000000a1a1a1, 0x4141000000414141, 0xfafa000000fafafa,
-	0x4343000000434343, 0x1313000000131313, 0xc4c4000000c4c4c4,
-	0x2f2f0000002f2f2f, 0xa8a8000000a8a8a8, 0xb6b6000000b6b6b6,
-	0x3c3c0000003c3c3c, 0x2b2b0000002b2b2b, 0xc1c1000000c1c1c1,
-	0xffff000000ffffff, 0xc8c8000000c8c8c8, 0xa5a5000000a5a5a5,
-	0x2020000000202020, 0x8989000000898989, 0x0000000000000000,
-	0x9090000000909090, 0x4747000000474747, 0xefef000000efefef,
-	0xeaea000000eaeaea, 0xb7b7000000b7b7b7, 0x1515000000151515,
-	0x0606000000060606, 0xcdcd000000cdcdcd, 0xb5b5000000b5b5b5,
-	0x1212000000121212, 0x7e7e0000007e7e7e, 0xbbbb000000bbbbbb,
-	0x2929000000292929, 0x0f0f0000000f0f0f, 0xb8b8000000b8b8b8,
-	0x0707000000070707, 0x0404000000040404, 0x9b9b0000009b9b9b,
-	0x9494000000949494, 0x2121000000212121, 0x6666000000666666,
-	0xe6e6000000e6e6e6, 0xcece000000cecece, 0xeded000000ededed,
-	0xe7e7000000e7e7e7, 0x3b3b0000003b3b3b, 0xfefe000000fefefe,
-	0x7f7f0000007f7f7f, 0xc5c5000000c5c5c5, 0xa4a4000000a4a4a4,
-	0x3737000000373737, 0xb1b1000000b1b1b1, 0x4c4c0000004c4c4c,
-	0x9191000000919191, 0x6e6e0000006e6e6e, 0x8d8d0000008d8d8d,
-	0x7676000000767676, 0x0303000000030303, 0x2d2d0000002d2d2d,
-	0xdede000000dedede, 0x9696000000969696, 0x2626000000262626,
-	0x7d7d0000007d7d7d, 0xc6c6000000c6c6c6, 0x5c5c0000005c5c5c,
-	0xd3d3000000d3d3d3, 0xf2f2000000f2f2f2, 0x4f4f0000004f4f4f,
-	0x1919000000191919, 0x3f3f0000003f3f3f, 0xdcdc000000dcdcdc,
-	0x7979000000797979, 0x1d1d0000001d1d1d, 0x5252000000525252,
-	0xebeb000000ebebeb, 0xf3f3000000f3f3f3, 0x6d6d0000006d6d6d,
-	0x5e5e0000005e5e5e, 0xfbfb000000fbfbfb, 0x6969000000696969,
-	0xb2b2000000b2b2b2, 0xf0f0000000f0f0f0, 0x3131000000313131,
-	0x0c0c0000000c0c0c, 0xd4d4000000d4d4d4, 0xcfcf000000cfcfcf,
-	0x8c8c0000008c8c8c, 0xe2e2000000e2e2e2, 0x7575000000757575,
-	0xa9a9000000a9a9a9, 0x4a4a0000004a4a4a, 0x5757000000575757,
-	0x8484000000848484, 0x1111000000111111, 0x4545000000454545,
-	0x1b1b0000001b1b1b, 0xf5f5000000f5f5f5, 0xe4e4000000e4e4e4,
-	0x0e0e0000000e0e0e, 0x7373000000737373, 0xaaaa000000aaaaaa,
-	0xf1f1000000f1f1f1, 0xdddd000000dddddd, 0x5959000000595959,
-	0x1414000000141414, 0x6c6c0000006c6c6c, 0x9292000000929292,
-	0x5454000000545454, 0xd0d0000000d0d0d0, 0x7878000000787878,
-	0x7070000000707070, 0xe3e3000000e3e3e3, 0x4949000000494949,
-	0x8080000000808080, 0x5050000000505050, 0xa7a7000000a7a7a7,
-	0xf6f6000000f6f6f6, 0x7777000000777777, 0x9393000000939393,
-	0x8686000000868686, 0x8383000000838383, 0x2a2a0000002a2a2a,
-	0xc7c7000000c7c7c7, 0x5b5b0000005b5b5b, 0xe9e9000000e9e9e9,
-	0xeeee000000eeeeee, 0x8f8f0000008f8f8f, 0x0101000000010101,
-	0x3d3d0000003d3d3d,
+	0xe0e0000000e0e0e0ULL, 0x0505000000050505ULL, 0x5858000000585858ULL,
+	0xd9d9000000d9d9d9ULL, 0x6767000000676767ULL, 0x4e4e0000004e4e4eULL,
+	0x8181000000818181ULL, 0xcbcb000000cbcbcbULL, 0xc9c9000000c9c9c9ULL,
+	0x0b0b0000000b0b0bULL, 0xaeae000000aeaeaeULL, 0x6a6a0000006a6a6aULL,
+	0xd5d5000000d5d5d5ULL, 0x1818000000181818ULL, 0x5d5d0000005d5d5dULL,
+	0x8282000000828282ULL, 0x4646000000464646ULL, 0xdfdf000000dfdfdfULL,
+	0xd6d6000000d6d6d6ULL, 0x2727000000272727ULL, 0x8a8a0000008a8a8aULL,
+	0x3232000000323232ULL, 0x4b4b0000004b4b4bULL, 0x4242000000424242ULL,
+	0xdbdb000000dbdbdbULL, 0x1c1c0000001c1c1cULL, 0x9e9e0000009e9e9eULL,
+	0x9c9c0000009c9c9cULL, 0x3a3a0000003a3a3aULL, 0xcaca000000cacacaULL,
+	0x2525000000252525ULL, 0x7b7b0000007b7b7bULL, 0x0d0d0000000d0d0dULL,
+	0x7171000000717171ULL, 0x5f5f0000005f5f5fULL, 0x1f1f0000001f1f1fULL,
+	0xf8f8000000f8f8f8ULL, 0xd7d7000000d7d7d7ULL, 0x3e3e0000003e3e3eULL,
+	0x9d9d0000009d9d9dULL, 0x7c7c0000007c7c7cULL, 0x6060000000606060ULL,
+	0xb9b9000000b9b9b9ULL, 0xbebe000000bebebeULL, 0xbcbc000000bcbcbcULL,
+	0x8b8b0000008b8b8bULL, 0x1616000000161616ULL, 0x3434000000343434ULL,
+	0x4d4d0000004d4d4dULL, 0xc3c3000000c3c3c3ULL, 0x7272000000727272ULL,
+	0x9595000000959595ULL, 0xabab000000abababULL, 0x8e8e0000008e8e8eULL,
+	0xbaba000000bababaULL, 0x7a7a0000007a7a7aULL, 0xb3b3000000b3b3b3ULL,
+	0x0202000000020202ULL, 0xb4b4000000b4b4b4ULL, 0xadad000000adadadULL,
+	0xa2a2000000a2a2a2ULL, 0xacac000000acacacULL, 0xd8d8000000d8d8d8ULL,
+	0x9a9a0000009a9a9aULL, 0x1717000000171717ULL, 0x1a1a0000001a1a1aULL,
+	0x3535000000353535ULL, 0xcccc000000ccccccULL, 0xf7f7000000f7f7f7ULL,
+	0x9999000000999999ULL, 0x6161000000616161ULL, 0x5a5a0000005a5a5aULL,
+	0xe8e8000000e8e8e8ULL, 0x2424000000242424ULL, 0x5656000000565656ULL,
+	0x4040000000404040ULL, 0xe1e1000000e1e1e1ULL, 0x6363000000636363ULL,
+	0x0909000000090909ULL, 0x3333000000333333ULL, 0xbfbf000000bfbfbfULL,
+	0x9898000000989898ULL, 0x9797000000979797ULL, 0x8585000000858585ULL,
+	0x6868000000686868ULL, 0xfcfc000000fcfcfcULL, 0xecec000000ecececULL,
+	0x0a0a0000000a0a0aULL, 0xdada000000dadadaULL, 0x6f6f0000006f6f6fULL,
+	0x5353000000535353ULL, 0x6262000000626262ULL, 0xa3a3000000a3a3a3ULL,
+	0x2e2e0000002e2e2eULL, 0x0808000000080808ULL, 0xafaf000000afafafULL,
+	0x2828000000282828ULL, 0xb0b0000000b0b0b0ULL, 0x7474000000747474ULL,
+	0xc2c2000000c2c2c2ULL, 0xbdbd000000bdbdbdULL, 0x3636000000363636ULL,
+	0x2222000000222222ULL, 0x3838000000383838ULL, 0x6464000000646464ULL,
+	0x1e1e0000001e1e1eULL, 0x3939000000393939ULL, 0x2c2c0000002c2c2cULL,
+	0xa6a6000000a6a6a6ULL, 0x3030000000303030ULL, 0xe5e5000000e5e5e5ULL,
+	0x4444000000444444ULL, 0xfdfd000000fdfdfdULL, 0x8888000000888888ULL,
+	0x9f9f0000009f9f9fULL, 0x6565000000656565ULL, 0x8787000000878787ULL,
+	0x6b6b0000006b6b6bULL, 0xf4f4000000f4f4f4ULL, 0x2323000000232323ULL,
+	0x4848000000484848ULL, 0x1010000000101010ULL, 0xd1d1000000d1d1d1ULL,
+	0x5151000000515151ULL, 0xc0c0000000c0c0c0ULL, 0xf9f9000000f9f9f9ULL,
+	0xd2d2000000d2d2d2ULL, 0xa0a0000000a0a0a0ULL, 0x5555000000555555ULL,
+	0xa1a1000000a1a1a1ULL, 0x4141000000414141ULL, 0xfafa000000fafafaULL,
+	0x4343000000434343ULL, 0x1313000000131313ULL, 0xc4c4000000c4c4c4ULL,
+	0x2f2f0000002f2f2fULL, 0xa8a8000000a8a8a8ULL, 0xb6b6000000b6b6b6ULL,
+	0x3c3c0000003c3c3cULL, 0x2b2b0000002b2b2bULL, 0xc1c1000000c1c1c1ULL,
+	0xffff000000ffffffULL, 0xc8c8000000c8c8c8ULL, 0xa5a5000000a5a5a5ULL,
+	0x2020000000202020ULL, 0x8989000000898989ULL, 0x0000000000000000ULL,
+	0x9090000000909090ULL, 0x4747000000474747ULL, 0xefef000000efefefULL,
+	0xeaea000000eaeaeaULL, 0xb7b7000000b7b7b7ULL, 0x1515000000151515ULL,
+	0x0606000000060606ULL, 0xcdcd000000cdcdcdULL, 0xb5b5000000b5b5b5ULL,
+	0x1212000000121212ULL, 0x7e7e0000007e7e7eULL, 0xbbbb000000bbbbbbULL,
+	0x2929000000292929ULL, 0x0f0f0000000f0f0fULL, 0xb8b8000000b8b8b8ULL,
+	0x0707000000070707ULL, 0x0404000000040404ULL, 0x9b9b0000009b9b9bULL,
+	0x9494000000949494ULL, 0x2121000000212121ULL, 0x6666000000666666ULL,
+	0xe6e6000000e6e6e6ULL, 0xcece000000cececeULL, 0xeded000000edededULL,
+	0xe7e7000000e7e7e7ULL, 0x3b3b0000003b3b3bULL, 0xfefe000000fefefeULL,
+	0x7f7f0000007f7f7fULL, 0xc5c5000000c5c5c5ULL, 0xa4a4000000a4a4a4ULL,
+	0x3737000000373737ULL, 0xb1b1000000b1b1b1ULL, 0x4c4c0000004c4c4cULL,
+	0x9191000000919191ULL, 0x6e6e0000006e6e6eULL, 0x8d8d0000008d8d8dULL,
+	0x7676000000767676ULL, 0x0303000000030303ULL, 0x2d2d0000002d2d2dULL,
+	0xdede000000dededeULL, 0x9696000000969696ULL, 0x2626000000262626ULL,
+	0x7d7d0000007d7d7dULL, 0xc6c6000000c6c6c6ULL, 0x5c5c0000005c5c5cULL,
+	0xd3d3000000d3d3d3ULL, 0xf2f2000000f2f2f2ULL, 0x4f4f0000004f4f4fULL,
+	0x1919000000191919ULL, 0x3f3f0000003f3f3fULL, 0xdcdc000000dcdcdcULL,
+	0x7979000000797979ULL, 0x1d1d0000001d1d1dULL, 0x5252000000525252ULL,
+	0xebeb000000ebebebULL, 0xf3f3000000f3f3f3ULL, 0x6d6d0000006d6d6dULL,
+	0x5e5e0000005e5e5eULL, 0xfbfb000000fbfbfbULL, 0x6969000000696969ULL,
+	0xb2b2000000b2b2b2ULL, 0xf0f0000000f0f0f0ULL, 0x3131000000313131ULL,
+	0x0c0c0000000c0c0cULL, 0xd4d4000000d4d4d4ULL, 0xcfcf000000cfcfcfULL,
+	0x8c8c0000008c8c8cULL, 0xe2e2000000e2e2e2ULL, 0x7575000000757575ULL,
+	0xa9a9000000a9a9a9ULL, 0x4a4a0000004a4a4aULL, 0x5757000000575757ULL,
+	0x8484000000848484ULL, 0x1111000000111111ULL, 0x4545000000454545ULL,
+	0x1b1b0000001b1b1bULL, 0xf5f5000000f5f5f5ULL, 0xe4e4000000e4e4e4ULL,
+	0x0e0e0000000e0e0eULL, 0x7373000000737373ULL, 0xaaaa000000aaaaaaULL,
+	0xf1f1000000f1f1f1ULL, 0xdddd000000ddddddULL, 0x5959000000595959ULL,
+	0x1414000000141414ULL, 0x6c6c0000006c6c6cULL, 0x9292000000929292ULL,
+	0x5454000000545454ULL, 0xd0d0000000d0d0d0ULL, 0x7878000000787878ULL,
+	0x7070000000707070ULL, 0xe3e3000000e3e3e3ULL, 0x4949000000494949ULL,
+	0x8080000000808080ULL, 0x5050000000505050ULL, 0xa7a7000000a7a7a7ULL,
+	0xf6f6000000f6f6f6ULL, 0x7777000000777777ULL, 0x9393000000939393ULL,
+	0x8686000000868686ULL, 0x8383000000838383ULL, 0x2a2a0000002a2a2aULL,
+	0xc7c7000000c7c7c7ULL, 0x5b5b0000005b5b5bULL, 0xe9e9000000e9e9e9ULL,
+	0xeeee000000eeeeeeULL, 0x8f8f0000008f8f8fULL, 0x0101000000010101ULL,
+	0x3d3d0000003d3d3dULL,
 };
 
 const u64 camellia_sp03303033[256] = {
-	0x0038380038003838, 0x0041410041004141, 0x0016160016001616,
-	0x0076760076007676, 0x00d9d900d900d9d9, 0x0093930093009393,
-	0x0060600060006060, 0x00f2f200f200f2f2, 0x0072720072007272,
-	0x00c2c200c200c2c2, 0x00abab00ab00abab, 0x009a9a009a009a9a,
-	0x0075750075007575, 0x0006060006000606, 0x0057570057005757,
-	0x00a0a000a000a0a0, 0x0091910091009191, 0x00f7f700f700f7f7,
-	0x00b5b500b500b5b5, 0x00c9c900c900c9c9, 0x00a2a200a200a2a2,
-	0x008c8c008c008c8c, 0x00d2d200d200d2d2, 0x0090900090009090,
-	0x00f6f600f600f6f6, 0x0007070007000707, 0x00a7a700a700a7a7,
-	0x0027270027002727, 0x008e8e008e008e8e, 0x00b2b200b200b2b2,
-	0x0049490049004949, 0x00dede00de00dede, 0x0043430043004343,
-	0x005c5c005c005c5c, 0x00d7d700d700d7d7, 0x00c7c700c700c7c7,
-	0x003e3e003e003e3e, 0x00f5f500f500f5f5, 0x008f8f008f008f8f,
-	0x0067670067006767, 0x001f1f001f001f1f, 0x0018180018001818,
-	0x006e6e006e006e6e, 0x00afaf00af00afaf, 0x002f2f002f002f2f,
-	0x00e2e200e200e2e2, 0x0085850085008585, 0x000d0d000d000d0d,
-	0x0053530053005353, 0x00f0f000f000f0f0, 0x009c9c009c009c9c,
-	0x0065650065006565, 0x00eaea00ea00eaea, 0x00a3a300a300a3a3,
-	0x00aeae00ae00aeae, 0x009e9e009e009e9e, 0x00ecec00ec00ecec,
-	0x0080800080008080, 0x002d2d002d002d2d, 0x006b6b006b006b6b,
-	0x00a8a800a800a8a8, 0x002b2b002b002b2b, 0x0036360036003636,
-	0x00a6a600a600a6a6, 0x00c5c500c500c5c5, 0x0086860086008686,
-	0x004d4d004d004d4d, 0x0033330033003333, 0x00fdfd00fd00fdfd,
-	0x0066660066006666, 0x0058580058005858, 0x0096960096009696,
-	0x003a3a003a003a3a, 0x0009090009000909, 0x0095950095009595,
-	0x0010100010001010, 0x0078780078007878, 0x00d8d800d800d8d8,
-	0x0042420042004242, 0x00cccc00cc00cccc, 0x00efef00ef00efef,
-	0x0026260026002626, 0x00e5e500e500e5e5, 0x0061610061006161,
-	0x001a1a001a001a1a, 0x003f3f003f003f3f, 0x003b3b003b003b3b,
-	0x0082820082008282, 0x00b6b600b600b6b6, 0x00dbdb00db00dbdb,
-	0x00d4d400d400d4d4, 0x0098980098009898, 0x00e8e800e800e8e8,
-	0x008b8b008b008b8b, 0x0002020002000202, 0x00ebeb00eb00ebeb,
-	0x000a0a000a000a0a, 0x002c2c002c002c2c, 0x001d1d001d001d1d,
-	0x00b0b000b000b0b0, 0x006f6f006f006f6f, 0x008d8d008d008d8d,
-	0x0088880088008888, 0x000e0e000e000e0e, 0x0019190019001919,
-	0x0087870087008787, 0x004e4e004e004e4e, 0x000b0b000b000b0b,
-	0x00a9a900a900a9a9, 0x000c0c000c000c0c, 0x0079790079007979,
-	0x0011110011001111, 0x007f7f007f007f7f, 0x0022220022002222,
-	0x00e7e700e700e7e7, 0x0059590059005959, 0x00e1e100e100e1e1,
-	0x00dada00da00dada, 0x003d3d003d003d3d, 0x00c8c800c800c8c8,
-	0x0012120012001212, 0x0004040004000404, 0x0074740074007474,
-	0x0054540054005454, 0x0030300030003030, 0x007e7e007e007e7e,
-	0x00b4b400b400b4b4, 0x0028280028002828, 0x0055550055005555,
-	0x0068680068006868, 0x0050500050005050, 0x00bebe00be00bebe,
-	0x00d0d000d000d0d0, 0x00c4c400c400c4c4, 0x0031310031003131,
-	0x00cbcb00cb00cbcb, 0x002a2a002a002a2a, 0x00adad00ad00adad,
-	0x000f0f000f000f0f, 0x00caca00ca00caca, 0x0070700070007070,
-	0x00ffff00ff00ffff, 0x0032320032003232, 0x0069690069006969,
-	0x0008080008000808, 0x0062620062006262, 0x0000000000000000,
-	0x0024240024002424, 0x00d1d100d100d1d1, 0x00fbfb00fb00fbfb,
-	0x00baba00ba00baba, 0x00eded00ed00eded, 0x0045450045004545,
-	0x0081810081008181, 0x0073730073007373, 0x006d6d006d006d6d,
-	0x0084840084008484, 0x009f9f009f009f9f, 0x00eeee00ee00eeee,
-	0x004a4a004a004a4a, 0x00c3c300c300c3c3, 0x002e2e002e002e2e,
-	0x00c1c100c100c1c1, 0x0001010001000101, 0x00e6e600e600e6e6,
-	0x0025250025002525, 0x0048480048004848, 0x0099990099009999,
-	0x00b9b900b900b9b9, 0x00b3b300b300b3b3, 0x007b7b007b007b7b,
-	0x00f9f900f900f9f9, 0x00cece00ce00cece, 0x00bfbf00bf00bfbf,
-	0x00dfdf00df00dfdf, 0x0071710071007171, 0x0029290029002929,
-	0x00cdcd00cd00cdcd, 0x006c6c006c006c6c, 0x0013130013001313,
-	0x0064640064006464, 0x009b9b009b009b9b, 0x0063630063006363,
-	0x009d9d009d009d9d, 0x00c0c000c000c0c0, 0x004b4b004b004b4b,
-	0x00b7b700b700b7b7, 0x00a5a500a500a5a5, 0x0089890089008989,
-	0x005f5f005f005f5f, 0x00b1b100b100b1b1, 0x0017170017001717,
-	0x00f4f400f400f4f4, 0x00bcbc00bc00bcbc, 0x00d3d300d300d3d3,
-	0x0046460046004646, 0x00cfcf00cf00cfcf, 0x0037370037003737,
-	0x005e5e005e005e5e, 0x0047470047004747, 0x0094940094009494,
-	0x00fafa00fa00fafa, 0x00fcfc00fc00fcfc, 0x005b5b005b005b5b,
-	0x0097970097009797, 0x00fefe00fe00fefe, 0x005a5a005a005a5a,
-	0x00acac00ac00acac, 0x003c3c003c003c3c, 0x004c4c004c004c4c,
-	0x0003030003000303, 0x0035350035003535, 0x00f3f300f300f3f3,
-	0x0023230023002323, 0x00b8b800b800b8b8, 0x005d5d005d005d5d,
-	0x006a6a006a006a6a, 0x0092920092009292, 0x00d5d500d500d5d5,
-	0x0021210021002121, 0x0044440044004444, 0x0051510051005151,
-	0x00c6c600c600c6c6, 0x007d7d007d007d7d, 0x0039390039003939,
-	0x0083830083008383, 0x00dcdc00dc00dcdc, 0x00aaaa00aa00aaaa,
-	0x007c7c007c007c7c, 0x0077770077007777, 0x0056560056005656,
-	0x0005050005000505, 0x001b1b001b001b1b, 0x00a4a400a400a4a4,
-	0x0015150015001515, 0x0034340034003434, 0x001e1e001e001e1e,
-	0x001c1c001c001c1c, 0x00f8f800f800f8f8, 0x0052520052005252,
-	0x0020200020002020, 0x0014140014001414, 0x00e9e900e900e9e9,
-	0x00bdbd00bd00bdbd, 0x00dddd00dd00dddd, 0x00e4e400e400e4e4,
-	0x00a1a100a100a1a1, 0x00e0e000e000e0e0, 0x008a8a008a008a8a,
-	0x00f1f100f100f1f1, 0x00d6d600d600d6d6, 0x007a7a007a007a7a,
-	0x00bbbb00bb00bbbb, 0x00e3e300e300e3e3, 0x0040400040004040,
-	0x004f4f004f004f4f,
+	0x0038380038003838ULL, 0x0041410041004141ULL, 0x0016160016001616ULL,
+	0x0076760076007676ULL, 0x00d9d900d900d9d9ULL, 0x0093930093009393ULL,
+	0x0060600060006060ULL, 0x00f2f200f200f2f2ULL, 0x0072720072007272ULL,
+	0x00c2c200c200c2c2ULL, 0x00abab00ab00ababULL, 0x009a9a009a009a9aULL,
+	0x0075750075007575ULL, 0x0006060006000606ULL, 0x0057570057005757ULL,
+	0x00a0a000a000a0a0ULL, 0x0091910091009191ULL, 0x00f7f700f700f7f7ULL,
+	0x00b5b500b500b5b5ULL, 0x00c9c900c900c9c9ULL, 0x00a2a200a200a2a2ULL,
+	0x008c8c008c008c8cULL, 0x00d2d200d200d2d2ULL, 0x0090900090009090ULL,
+	0x00f6f600f600f6f6ULL, 0x0007070007000707ULL, 0x00a7a700a700a7a7ULL,
+	0x0027270027002727ULL, 0x008e8e008e008e8eULL, 0x00b2b200b200b2b2ULL,
+	0x0049490049004949ULL, 0x00dede00de00dedeULL, 0x0043430043004343ULL,
+	0x005c5c005c005c5cULL, 0x00d7d700d700d7d7ULL, 0x00c7c700c700c7c7ULL,
+	0x003e3e003e003e3eULL, 0x00f5f500f500f5f5ULL, 0x008f8f008f008f8fULL,
+	0x0067670067006767ULL, 0x001f1f001f001f1fULL, 0x0018180018001818ULL,
+	0x006e6e006e006e6eULL, 0x00afaf00af00afafULL, 0x002f2f002f002f2fULL,
+	0x00e2e200e200e2e2ULL, 0x0085850085008585ULL, 0x000d0d000d000d0dULL,
+	0x0053530053005353ULL, 0x00f0f000f000f0f0ULL, 0x009c9c009c009c9cULL,
+	0x0065650065006565ULL, 0x00eaea00ea00eaeaULL, 0x00a3a300a300a3a3ULL,
+	0x00aeae00ae00aeaeULL, 0x009e9e009e009e9eULL, 0x00ecec00ec00ececULL,
+	0x0080800080008080ULL, 0x002d2d002d002d2dULL, 0x006b6b006b006b6bULL,
+	0x00a8a800a800a8a8ULL, 0x002b2b002b002b2bULL, 0x0036360036003636ULL,
+	0x00a6a600a600a6a6ULL, 0x00c5c500c500c5c5ULL, 0x0086860086008686ULL,
+	0x004d4d004d004d4dULL, 0x0033330033003333ULL, 0x00fdfd00fd00fdfdULL,
+	0x0066660066006666ULL, 0x0058580058005858ULL, 0x0096960096009696ULL,
+	0x003a3a003a003a3aULL, 0x0009090009000909ULL, 0x0095950095009595ULL,
+	0x0010100010001010ULL, 0x0078780078007878ULL, 0x00d8d800d800d8d8ULL,
+	0x0042420042004242ULL, 0x00cccc00cc00ccccULL, 0x00efef00ef00efefULL,
+	0x0026260026002626ULL, 0x00e5e500e500e5e5ULL, 0x0061610061006161ULL,
+	0x001a1a001a001a1aULL, 0x003f3f003f003f3fULL, 0x003b3b003b003b3bULL,
+	0x0082820082008282ULL, 0x00b6b600b600b6b6ULL, 0x00dbdb00db00dbdbULL,
+	0x00d4d400d400d4d4ULL, 0x0098980098009898ULL, 0x00e8e800e800e8e8ULL,
+	0x008b8b008b008b8bULL, 0x0002020002000202ULL, 0x00ebeb00eb00ebebULL,
+	0x000a0a000a000a0aULL, 0x002c2c002c002c2cULL, 0x001d1d001d001d1dULL,
+	0x00b0b000b000b0b0ULL, 0x006f6f006f006f6fULL, 0x008d8d008d008d8dULL,
+	0x0088880088008888ULL, 0x000e0e000e000e0eULL, 0x0019190019001919ULL,
+	0x0087870087008787ULL, 0x004e4e004e004e4eULL, 0x000b0b000b000b0bULL,
+	0x00a9a900a900a9a9ULL, 0x000c0c000c000c0cULL, 0x0079790079007979ULL,
+	0x0011110011001111ULL, 0x007f7f007f007f7fULL, 0x0022220022002222ULL,
+	0x00e7e700e700e7e7ULL, 0x0059590059005959ULL, 0x00e1e100e100e1e1ULL,
+	0x00dada00da00dadaULL, 0x003d3d003d003d3dULL, 0x00c8c800c800c8c8ULL,
+	0x0012120012001212ULL, 0x0004040004000404ULL, 0x0074740074007474ULL,
+	0x0054540054005454ULL, 0x0030300030003030ULL, 0x007e7e007e007e7eULL,
+	0x00b4b400b400b4b4ULL, 0x0028280028002828ULL, 0x0055550055005555ULL,
+	0x0068680068006868ULL, 0x0050500050005050ULL, 0x00bebe00be00bebeULL,
+	0x00d0d000d000d0d0ULL, 0x00c4c400c400c4c4ULL, 0x0031310031003131ULL,
+	0x00cbcb00cb00cbcbULL, 0x002a2a002a002a2aULL, 0x00adad00ad00adadULL,
+	0x000f0f000f000f0fULL, 0x00caca00ca00cacaULL, 0x0070700070007070ULL,
+	0x00ffff00ff00ffffULL, 0x0032320032003232ULL, 0x0069690069006969ULL,
+	0x0008080008000808ULL, 0x0062620062006262ULL, 0x0000000000000000ULL,
+	0x0024240024002424ULL, 0x00d1d100d100d1d1ULL, 0x00fbfb00fb00fbfbULL,
+	0x00baba00ba00babaULL, 0x00eded00ed00ededULL, 0x0045450045004545ULL,
+	0x0081810081008181ULL, 0x0073730073007373ULL, 0x006d6d006d006d6dULL,
+	0x0084840084008484ULL, 0x009f9f009f009f9fULL, 0x00eeee00ee00eeeeULL,
+	0x004a4a004a004a4aULL, 0x00c3c300c300c3c3ULL, 0x002e2e002e002e2eULL,
+	0x00c1c100c100c1c1ULL, 0x0001010001000101ULL, 0x00e6e600e600e6e6ULL,
+	0x0025250025002525ULL, 0x0048480048004848ULL, 0x0099990099009999ULL,
+	0x00b9b900b900b9b9ULL, 0x00b3b300b300b3b3ULL, 0x007b7b007b007b7bULL,
+	0x00f9f900f900f9f9ULL, 0x00cece00ce00ceceULL, 0x00bfbf00bf00bfbfULL,
+	0x00dfdf00df00dfdfULL, 0x0071710071007171ULL, 0x0029290029002929ULL,
+	0x00cdcd00cd00cdcdULL, 0x006c6c006c006c6cULL, 0x0013130013001313ULL,
+	0x0064640064006464ULL, 0x009b9b009b009b9bULL, 0x0063630063006363ULL,
+	0x009d9d009d009d9dULL, 0x00c0c000c000c0c0ULL, 0x004b4b004b004b4bULL,
+	0x00b7b700b700b7b7ULL, 0x00a5a500a500a5a5ULL, 0x0089890089008989ULL,
+	0x005f5f005f005f5fULL, 0x00b1b100b100b1b1ULL, 0x0017170017001717ULL,
+	0x00f4f400f400f4f4ULL, 0x00bcbc00bc00bcbcULL, 0x00d3d300d300d3d3ULL,
+	0x0046460046004646ULL, 0x00cfcf00cf00cfcfULL, 0x0037370037003737ULL,
+	0x005e5e005e005e5eULL, 0x0047470047004747ULL, 0x0094940094009494ULL,
+	0x00fafa00fa00fafaULL, 0x00fcfc00fc00fcfcULL, 0x005b5b005b005b5bULL,
+	0x0097970097009797ULL, 0x00fefe00fe00fefeULL, 0x005a5a005a005a5aULL,
+	0x00acac00ac00acacULL, 0x003c3c003c003c3cULL, 0x004c4c004c004c4cULL,
+	0x0003030003000303ULL, 0x0035350035003535ULL, 0x00f3f300f300f3f3ULL,
+	0x0023230023002323ULL, 0x00b8b800b800b8b8ULL, 0x005d5d005d005d5dULL,
+	0x006a6a006a006a6aULL, 0x0092920092009292ULL, 0x00d5d500d500d5d5ULL,
+	0x0021210021002121ULL, 0x0044440044004444ULL, 0x0051510051005151ULL,
+	0x00c6c600c600c6c6ULL, 0x007d7d007d007d7dULL, 0x0039390039003939ULL,
+	0x0083830083008383ULL, 0x00dcdc00dc00dcdcULL, 0x00aaaa00aa00aaaaULL,
+	0x007c7c007c007c7cULL, 0x0077770077007777ULL, 0x0056560056005656ULL,
+	0x0005050005000505ULL, 0x001b1b001b001b1bULL, 0x00a4a400a400a4a4ULL,
+	0x0015150015001515ULL, 0x0034340034003434ULL, 0x001e1e001e001e1eULL,
+	0x001c1c001c001c1cULL, 0x00f8f800f800f8f8ULL, 0x0052520052005252ULL,
+	0x0020200020002020ULL, 0x0014140014001414ULL, 0x00e9e900e900e9e9ULL,
+	0x00bdbd00bd00bdbdULL, 0x00dddd00dd00ddddULL, 0x00e4e400e400e4e4ULL,
+	0x00a1a100a100a1a1ULL, 0x00e0e000e000e0e0ULL, 0x008a8a008a008a8aULL,
+	0x00f1f100f100f1f1ULL, 0x00d6d600d600d6d6ULL, 0x007a7a007a007a7aULL,
+	0x00bbbb00bb00bbbbULL, 0x00e3e300e300e3e3ULL, 0x0040400040004040ULL,
+	0x004f4f004f004f4fULL,
 };
 
 const u64 camellia_sp00444404[256] = {
-	0x0000707070700070, 0x00002c2c2c2c002c, 0x0000b3b3b3b300b3,
-	0x0000c0c0c0c000c0, 0x0000e4e4e4e400e4, 0x0000575757570057,
-	0x0000eaeaeaea00ea, 0x0000aeaeaeae00ae, 0x0000232323230023,
-	0x00006b6b6b6b006b, 0x0000454545450045, 0x0000a5a5a5a500a5,
-	0x0000edededed00ed, 0x00004f4f4f4f004f, 0x00001d1d1d1d001d,
-	0x0000929292920092, 0x0000868686860086, 0x0000afafafaf00af,
-	0x00007c7c7c7c007c, 0x00001f1f1f1f001f, 0x00003e3e3e3e003e,
-	0x0000dcdcdcdc00dc, 0x00005e5e5e5e005e, 0x00000b0b0b0b000b,
-	0x0000a6a6a6a600a6, 0x0000393939390039, 0x0000d5d5d5d500d5,
-	0x00005d5d5d5d005d, 0x0000d9d9d9d900d9, 0x00005a5a5a5a005a,
-	0x0000515151510051, 0x00006c6c6c6c006c, 0x00008b8b8b8b008b,
-	0x00009a9a9a9a009a, 0x0000fbfbfbfb00fb, 0x0000b0b0b0b000b0,
-	0x0000747474740074, 0x00002b2b2b2b002b, 0x0000f0f0f0f000f0,
-	0x0000848484840084, 0x0000dfdfdfdf00df, 0x0000cbcbcbcb00cb,
-	0x0000343434340034, 0x0000767676760076, 0x00006d6d6d6d006d,
-	0x0000a9a9a9a900a9, 0x0000d1d1d1d100d1, 0x0000040404040004,
-	0x0000141414140014, 0x00003a3a3a3a003a, 0x0000dededede00de,
-	0x0000111111110011, 0x0000323232320032, 0x00009c9c9c9c009c,
-	0x0000535353530053, 0x0000f2f2f2f200f2, 0x0000fefefefe00fe,
-	0x0000cfcfcfcf00cf, 0x0000c3c3c3c300c3, 0x00007a7a7a7a007a,
-	0x0000242424240024, 0x0000e8e8e8e800e8, 0x0000606060600060,
-	0x0000696969690069, 0x0000aaaaaaaa00aa, 0x0000a0a0a0a000a0,
-	0x0000a1a1a1a100a1, 0x0000626262620062, 0x0000545454540054,
-	0x00001e1e1e1e001e, 0x0000e0e0e0e000e0, 0x0000646464640064,
-	0x0000101010100010, 0x0000000000000000, 0x0000a3a3a3a300a3,
-	0x0000757575750075, 0x00008a8a8a8a008a, 0x0000e6e6e6e600e6,
-	0x0000090909090009, 0x0000dddddddd00dd, 0x0000878787870087,
-	0x0000838383830083, 0x0000cdcdcdcd00cd, 0x0000909090900090,
-	0x0000737373730073, 0x0000f6f6f6f600f6, 0x00009d9d9d9d009d,
-	0x0000bfbfbfbf00bf, 0x0000525252520052, 0x0000d8d8d8d800d8,
-	0x0000c8c8c8c800c8, 0x0000c6c6c6c600c6, 0x0000818181810081,
-	0x00006f6f6f6f006f, 0x0000131313130013, 0x0000636363630063,
-	0x0000e9e9e9e900e9, 0x0000a7a7a7a700a7, 0x00009f9f9f9f009f,
-	0x0000bcbcbcbc00bc, 0x0000292929290029, 0x0000f9f9f9f900f9,
-	0x00002f2f2f2f002f, 0x0000b4b4b4b400b4, 0x0000787878780078,
-	0x0000060606060006, 0x0000e7e7e7e700e7, 0x0000717171710071,
-	0x0000d4d4d4d400d4, 0x0000abababab00ab, 0x0000888888880088,
-	0x00008d8d8d8d008d, 0x0000727272720072, 0x0000b9b9b9b900b9,
-	0x0000f8f8f8f800f8, 0x0000acacacac00ac, 0x0000363636360036,
-	0x00002a2a2a2a002a, 0x00003c3c3c3c003c, 0x0000f1f1f1f100f1,
-	0x0000404040400040, 0x0000d3d3d3d300d3, 0x0000bbbbbbbb00bb,
-	0x0000434343430043, 0x0000151515150015, 0x0000adadadad00ad,
-	0x0000777777770077, 0x0000808080800080, 0x0000828282820082,
-	0x0000ecececec00ec, 0x0000272727270027, 0x0000e5e5e5e500e5,
-	0x0000858585850085, 0x0000353535350035, 0x00000c0c0c0c000c,
-	0x0000414141410041, 0x0000efefefef00ef, 0x0000939393930093,
-	0x0000191919190019, 0x0000212121210021, 0x00000e0e0e0e000e,
-	0x00004e4e4e4e004e, 0x0000656565650065, 0x0000bdbdbdbd00bd,
-	0x0000b8b8b8b800b8, 0x00008f8f8f8f008f, 0x0000ebebebeb00eb,
-	0x0000cececece00ce, 0x0000303030300030, 0x00005f5f5f5f005f,
-	0x0000c5c5c5c500c5, 0x00001a1a1a1a001a, 0x0000e1e1e1e100e1,
-	0x0000cacacaca00ca, 0x0000474747470047, 0x00003d3d3d3d003d,
-	0x0000010101010001, 0x0000d6d6d6d600d6, 0x0000565656560056,
-	0x00004d4d4d4d004d, 0x00000d0d0d0d000d, 0x0000666666660066,
-	0x0000cccccccc00cc, 0x00002d2d2d2d002d, 0x0000121212120012,
-	0x0000202020200020, 0x0000b1b1b1b100b1, 0x0000999999990099,
-	0x00004c4c4c4c004c, 0x0000c2c2c2c200c2, 0x00007e7e7e7e007e,
-	0x0000050505050005, 0x0000b7b7b7b700b7, 0x0000313131310031,
-	0x0000171717170017, 0x0000d7d7d7d700d7, 0x0000585858580058,
-	0x0000616161610061, 0x00001b1b1b1b001b, 0x00001c1c1c1c001c,
-	0x00000f0f0f0f000f, 0x0000161616160016, 0x0000181818180018,
-	0x0000222222220022, 0x0000444444440044, 0x0000b2b2b2b200b2,
-	0x0000b5b5b5b500b5, 0x0000919191910091, 0x0000080808080008,
-	0x0000a8a8a8a800a8, 0x0000fcfcfcfc00fc, 0x0000505050500050,
-	0x0000d0d0d0d000d0, 0x00007d7d7d7d007d, 0x0000898989890089,
-	0x0000979797970097, 0x00005b5b5b5b005b, 0x0000959595950095,
-	0x0000ffffffff00ff, 0x0000d2d2d2d200d2, 0x0000c4c4c4c400c4,
-	0x0000484848480048, 0x0000f7f7f7f700f7, 0x0000dbdbdbdb00db,
-	0x0000030303030003, 0x0000dadadada00da, 0x00003f3f3f3f003f,
-	0x0000949494940094, 0x00005c5c5c5c005c, 0x0000020202020002,
-	0x00004a4a4a4a004a, 0x0000333333330033, 0x0000676767670067,
-	0x0000f3f3f3f300f3, 0x00007f7f7f7f007f, 0x0000e2e2e2e200e2,
-	0x00009b9b9b9b009b, 0x0000262626260026, 0x0000373737370037,
-	0x00003b3b3b3b003b, 0x0000969696960096, 0x00004b4b4b4b004b,
-	0x0000bebebebe00be, 0x00002e2e2e2e002e, 0x0000797979790079,
-	0x00008c8c8c8c008c, 0x00006e6e6e6e006e, 0x00008e8e8e8e008e,
-	0x0000f5f5f5f500f5, 0x0000b6b6b6b600b6, 0x0000fdfdfdfd00fd,
-	0x0000595959590059, 0x0000989898980098, 0x00006a6a6a6a006a,
-	0x0000464646460046, 0x0000babababa00ba, 0x0000252525250025,
-	0x0000424242420042, 0x0000a2a2a2a200a2, 0x0000fafafafa00fa,
-	0x0000070707070007, 0x0000555555550055, 0x0000eeeeeeee00ee,
-	0x00000a0a0a0a000a, 0x0000494949490049, 0x0000686868680068,
-	0x0000383838380038, 0x0000a4a4a4a400a4, 0x0000282828280028,
-	0x00007b7b7b7b007b, 0x0000c9c9c9c900c9, 0x0000c1c1c1c100c1,
-	0x0000e3e3e3e300e3, 0x0000f4f4f4f400f4, 0x0000c7c7c7c700c7,
-	0x00009e9e9e9e009e,
+	0x0000707070700070ULL, 0x00002c2c2c2c002cULL, 0x0000b3b3b3b300b3ULL,
+	0x0000c0c0c0c000c0ULL, 0x0000e4e4e4e400e4ULL, 0x0000575757570057ULL,
+	0x0000eaeaeaea00eaULL, 0x0000aeaeaeae00aeULL, 0x0000232323230023ULL,
+	0x00006b6b6b6b006bULL, 0x0000454545450045ULL, 0x0000a5a5a5a500a5ULL,
+	0x0000edededed00edULL, 0x00004f4f4f4f004fULL, 0x00001d1d1d1d001dULL,
+	0x0000929292920092ULL, 0x0000868686860086ULL, 0x0000afafafaf00afULL,
+	0x00007c7c7c7c007cULL, 0x00001f1f1f1f001fULL, 0x00003e3e3e3e003eULL,
+	0x0000dcdcdcdc00dcULL, 0x00005e5e5e5e005eULL, 0x00000b0b0b0b000bULL,
+	0x0000a6a6a6a600a6ULL, 0x0000393939390039ULL, 0x0000d5d5d5d500d5ULL,
+	0x00005d5d5d5d005dULL, 0x0000d9d9d9d900d9ULL, 0x00005a5a5a5a005aULL,
+	0x0000515151510051ULL, 0x00006c6c6c6c006cULL, 0x00008b8b8b8b008bULL,
+	0x00009a9a9a9a009aULL, 0x0000fbfbfbfb00fbULL, 0x0000b0b0b0b000b0ULL,
+	0x0000747474740074ULL, 0x00002b2b2b2b002bULL, 0x0000f0f0f0f000f0ULL,
+	0x0000848484840084ULL, 0x0000dfdfdfdf00dfULL, 0x0000cbcbcbcb00cbULL,
+	0x0000343434340034ULL, 0x0000767676760076ULL, 0x00006d6d6d6d006dULL,
+	0x0000a9a9a9a900a9ULL, 0x0000d1d1d1d100d1ULL, 0x0000040404040004ULL,
+	0x0000141414140014ULL, 0x00003a3a3a3a003aULL, 0x0000dededede00deULL,
+	0x0000111111110011ULL, 0x0000323232320032ULL, 0x00009c9c9c9c009cULL,
+	0x0000535353530053ULL, 0x0000f2f2f2f200f2ULL, 0x0000fefefefe00feULL,
+	0x0000cfcfcfcf00cfULL, 0x0000c3c3c3c300c3ULL, 0x00007a7a7a7a007aULL,
+	0x0000242424240024ULL, 0x0000e8e8e8e800e8ULL, 0x0000606060600060ULL,
+	0x0000696969690069ULL, 0x0000aaaaaaaa00aaULL, 0x0000a0a0a0a000a0ULL,
+	0x0000a1a1a1a100a1ULL, 0x0000626262620062ULL, 0x0000545454540054ULL,
+	0x00001e1e1e1e001eULL, 0x0000e0e0e0e000e0ULL, 0x0000646464640064ULL,
+	0x0000101010100010ULL, 0x0000000000000000ULL, 0x0000a3a3a3a300a3ULL,
+	0x0000757575750075ULL, 0x00008a8a8a8a008aULL, 0x0000e6e6e6e600e6ULL,
+	0x0000090909090009ULL, 0x0000dddddddd00ddULL, 0x0000878787870087ULL,
+	0x0000838383830083ULL, 0x0000cdcdcdcd00cdULL, 0x0000909090900090ULL,
+	0x0000737373730073ULL, 0x0000f6f6f6f600f6ULL, 0x00009d9d9d9d009dULL,
+	0x0000bfbfbfbf00bfULL, 0x0000525252520052ULL, 0x0000d8d8d8d800d8ULL,
+	0x0000c8c8c8c800c8ULL, 0x0000c6c6c6c600c6ULL, 0x0000818181810081ULL,
+	0x00006f6f6f6f006fULL, 0x0000131313130013ULL, 0x0000636363630063ULL,
+	0x0000e9e9e9e900e9ULL, 0x0000a7a7a7a700a7ULL, 0x00009f9f9f9f009fULL,
+	0x0000bcbcbcbc00bcULL, 0x0000292929290029ULL, 0x0000f9f9f9f900f9ULL,
+	0x00002f2f2f2f002fULL, 0x0000b4b4b4b400b4ULL, 0x0000787878780078ULL,
+	0x0000060606060006ULL, 0x0000e7e7e7e700e7ULL, 0x0000717171710071ULL,
+	0x0000d4d4d4d400d4ULL, 0x0000abababab00abULL, 0x0000888888880088ULL,
+	0x00008d8d8d8d008dULL, 0x0000727272720072ULL, 0x0000b9b9b9b900b9ULL,
+	0x0000f8f8f8f800f8ULL, 0x0000acacacac00acULL, 0x0000363636360036ULL,
+	0x00002a2a2a2a002aULL, 0x00003c3c3c3c003cULL, 0x0000f1f1f1f100f1ULL,
+	0x0000404040400040ULL, 0x0000d3d3d3d300d3ULL, 0x0000bbbbbbbb00bbULL,
+	0x0000434343430043ULL, 0x0000151515150015ULL, 0x0000adadadad00adULL,
+	0x0000777777770077ULL, 0x0000808080800080ULL, 0x0000828282820082ULL,
+	0x0000ecececec00ecULL, 0x0000272727270027ULL, 0x0000e5e5e5e500e5ULL,
+	0x0000858585850085ULL, 0x0000353535350035ULL, 0x00000c0c0c0c000cULL,
+	0x0000414141410041ULL, 0x0000efefefef00efULL, 0x0000939393930093ULL,
+	0x0000191919190019ULL, 0x0000212121210021ULL, 0x00000e0e0e0e000eULL,
+	0x00004e4e4e4e004eULL, 0x0000656565650065ULL, 0x0000bdbdbdbd00bdULL,
+	0x0000b8b8b8b800b8ULL, 0x00008f8f8f8f008fULL, 0x0000ebebebeb00ebULL,
+	0x0000cececece00ceULL, 0x0000303030300030ULL, 0x00005f5f5f5f005fULL,
+	0x0000c5c5c5c500c5ULL, 0x00001a1a1a1a001aULL, 0x0000e1e1e1e100e1ULL,
+	0x0000cacacaca00caULL, 0x0000474747470047ULL, 0x00003d3d3d3d003dULL,
+	0x0000010101010001ULL, 0x0000d6d6d6d600d6ULL, 0x0000565656560056ULL,
+	0x00004d4d4d4d004dULL, 0x00000d0d0d0d000dULL, 0x0000666666660066ULL,
+	0x0000cccccccc00ccULL, 0x00002d2d2d2d002dULL, 0x0000121212120012ULL,
+	0x0000202020200020ULL, 0x0000b1b1b1b100b1ULL, 0x0000999999990099ULL,
+	0x00004c4c4c4c004cULL, 0x0000c2c2c2c200c2ULL, 0x00007e7e7e7e007eULL,
+	0x0000050505050005ULL, 0x0000b7b7b7b700b7ULL, 0x0000313131310031ULL,
+	0x0000171717170017ULL, 0x0000d7d7d7d700d7ULL, 0x0000585858580058ULL,
+	0x0000616161610061ULL, 0x00001b1b1b1b001bULL, 0x00001c1c1c1c001cULL,
+	0x00000f0f0f0f000fULL, 0x0000161616160016ULL, 0x0000181818180018ULL,
+	0x0000222222220022ULL, 0x0000444444440044ULL, 0x0000b2b2b2b200b2ULL,
+	0x0000b5b5b5b500b5ULL, 0x0000919191910091ULL, 0x0000080808080008ULL,
+	0x0000a8a8a8a800a8ULL, 0x0000fcfcfcfc00fcULL, 0x0000505050500050ULL,
+	0x0000d0d0d0d000d0ULL, 0x00007d7d7d7d007dULL, 0x0000898989890089ULL,
+	0x0000979797970097ULL, 0x00005b5b5b5b005bULL, 0x0000959595950095ULL,
+	0x0000ffffffff00ffULL, 0x0000d2d2d2d200d2ULL, 0x0000c4c4c4c400c4ULL,
+	0x0000484848480048ULL, 0x0000f7f7f7f700f7ULL, 0x0000dbdbdbdb00dbULL,
+	0x0000030303030003ULL, 0x0000dadadada00daULL, 0x00003f3f3f3f003fULL,
+	0x0000949494940094ULL, 0x00005c5c5c5c005cULL, 0x0000020202020002ULL,
+	0x00004a4a4a4a004aULL, 0x0000333333330033ULL, 0x0000676767670067ULL,
+	0x0000f3f3f3f300f3ULL, 0x00007f7f7f7f007fULL, 0x0000e2e2e2e200e2ULL,
+	0x00009b9b9b9b009bULL, 0x0000262626260026ULL, 0x0000373737370037ULL,
+	0x00003b3b3b3b003bULL, 0x0000969696960096ULL, 0x00004b4b4b4b004bULL,
+	0x0000bebebebe00beULL, 0x00002e2e2e2e002eULL, 0x0000797979790079ULL,
+	0x00008c8c8c8c008cULL, 0x00006e6e6e6e006eULL, 0x00008e8e8e8e008eULL,
+	0x0000f5f5f5f500f5ULL, 0x0000b6b6b6b600b6ULL, 0x0000fdfdfdfd00fdULL,
+	0x0000595959590059ULL, 0x0000989898980098ULL, 0x00006a6a6a6a006aULL,
+	0x0000464646460046ULL, 0x0000babababa00baULL, 0x0000252525250025ULL,
+	0x0000424242420042ULL, 0x0000a2a2a2a200a2ULL, 0x0000fafafafa00faULL,
+	0x0000070707070007ULL, 0x0000555555550055ULL, 0x0000eeeeeeee00eeULL,
+	0x00000a0a0a0a000aULL, 0x0000494949490049ULL, 0x0000686868680068ULL,
+	0x0000383838380038ULL, 0x0000a4a4a4a400a4ULL, 0x0000282828280028ULL,
+	0x00007b7b7b7b007bULL, 0x0000c9c9c9c900c9ULL, 0x0000c1c1c1c100c1ULL,
+	0x0000e3e3e3e300e3ULL, 0x0000f4f4f4f400f4ULL, 0x0000c7c7c7c700c7ULL,
+	0x00009e9e9e9e009eULL,
 };
 
 const u64 camellia_sp02220222[256] = {
-	0x00e0e0e000e0e0e0, 0x0005050500050505, 0x0058585800585858,
-	0x00d9d9d900d9d9d9, 0x0067676700676767, 0x004e4e4e004e4e4e,
-	0x0081818100818181, 0x00cbcbcb00cbcbcb, 0x00c9c9c900c9c9c9,
-	0x000b0b0b000b0b0b, 0x00aeaeae00aeaeae, 0x006a6a6a006a6a6a,
-	0x00d5d5d500d5d5d5, 0x0018181800181818, 0x005d5d5d005d5d5d,
-	0x0082828200828282, 0x0046464600464646, 0x00dfdfdf00dfdfdf,
-	0x00d6d6d600d6d6d6, 0x0027272700272727, 0x008a8a8a008a8a8a,
-	0x0032323200323232, 0x004b4b4b004b4b4b, 0x0042424200424242,
-	0x00dbdbdb00dbdbdb, 0x001c1c1c001c1c1c, 0x009e9e9e009e9e9e,
-	0x009c9c9c009c9c9c, 0x003a3a3a003a3a3a, 0x00cacaca00cacaca,
-	0x0025252500252525, 0x007b7b7b007b7b7b, 0x000d0d0d000d0d0d,
-	0x0071717100717171, 0x005f5f5f005f5f5f, 0x001f1f1f001f1f1f,
-	0x00f8f8f800f8f8f8, 0x00d7d7d700d7d7d7, 0x003e3e3e003e3e3e,
-	0x009d9d9d009d9d9d, 0x007c7c7c007c7c7c, 0x0060606000606060,
-	0x00b9b9b900b9b9b9, 0x00bebebe00bebebe, 0x00bcbcbc00bcbcbc,
-	0x008b8b8b008b8b8b, 0x0016161600161616, 0x0034343400343434,
-	0x004d4d4d004d4d4d, 0x00c3c3c300c3c3c3, 0x0072727200727272,
-	0x0095959500959595, 0x00ababab00ababab, 0x008e8e8e008e8e8e,
-	0x00bababa00bababa, 0x007a7a7a007a7a7a, 0x00b3b3b300b3b3b3,
-	0x0002020200020202, 0x00b4b4b400b4b4b4, 0x00adadad00adadad,
-	0x00a2a2a200a2a2a2, 0x00acacac00acacac, 0x00d8d8d800d8d8d8,
-	0x009a9a9a009a9a9a, 0x0017171700171717, 0x001a1a1a001a1a1a,
-	0x0035353500353535, 0x00cccccc00cccccc, 0x00f7f7f700f7f7f7,
-	0x0099999900999999, 0x0061616100616161, 0x005a5a5a005a5a5a,
-	0x00e8e8e800e8e8e8, 0x0024242400242424, 0x0056565600565656,
-	0x0040404000404040, 0x00e1e1e100e1e1e1, 0x0063636300636363,
-	0x0009090900090909, 0x0033333300333333, 0x00bfbfbf00bfbfbf,
-	0x0098989800989898, 0x0097979700979797, 0x0085858500858585,
-	0x0068686800686868, 0x00fcfcfc00fcfcfc, 0x00ececec00ececec,
-	0x000a0a0a000a0a0a, 0x00dadada00dadada, 0x006f6f6f006f6f6f,
-	0x0053535300535353, 0x0062626200626262, 0x00a3a3a300a3a3a3,
-	0x002e2e2e002e2e2e, 0x0008080800080808, 0x00afafaf00afafaf,
-	0x0028282800282828, 0x00b0b0b000b0b0b0, 0x0074747400747474,
-	0x00c2c2c200c2c2c2, 0x00bdbdbd00bdbdbd, 0x0036363600363636,
-	0x0022222200222222, 0x0038383800383838, 0x0064646400646464,
-	0x001e1e1e001e1e1e, 0x0039393900393939, 0x002c2c2c002c2c2c,
-	0x00a6a6a600a6a6a6, 0x0030303000303030, 0x00e5e5e500e5e5e5,
-	0x0044444400444444, 0x00fdfdfd00fdfdfd, 0x0088888800888888,
-	0x009f9f9f009f9f9f, 0x0065656500656565, 0x0087878700878787,
-	0x006b6b6b006b6b6b, 0x00f4f4f400f4f4f4, 0x0023232300232323,
-	0x0048484800484848, 0x0010101000101010, 0x00d1d1d100d1d1d1,
-	0x0051515100515151, 0x00c0c0c000c0c0c0, 0x00f9f9f900f9f9f9,
-	0x00d2d2d200d2d2d2, 0x00a0a0a000a0a0a0, 0x0055555500555555,
-	0x00a1a1a100a1a1a1, 0x0041414100414141, 0x00fafafa00fafafa,
-	0x0043434300434343, 0x0013131300131313, 0x00c4c4c400c4c4c4,
-	0x002f2f2f002f2f2f, 0x00a8a8a800a8a8a8, 0x00b6b6b600b6b6b6,
-	0x003c3c3c003c3c3c, 0x002b2b2b002b2b2b, 0x00c1c1c100c1c1c1,
-	0x00ffffff00ffffff, 0x00c8c8c800c8c8c8, 0x00a5a5a500a5a5a5,
-	0x0020202000202020, 0x0089898900898989, 0x0000000000000000,
-	0x0090909000909090, 0x0047474700474747, 0x00efefef00efefef,
-	0x00eaeaea00eaeaea, 0x00b7b7b700b7b7b7, 0x0015151500151515,
-	0x0006060600060606, 0x00cdcdcd00cdcdcd, 0x00b5b5b500b5b5b5,
-	0x0012121200121212, 0x007e7e7e007e7e7e, 0x00bbbbbb00bbbbbb,
-	0x0029292900292929, 0x000f0f0f000f0f0f, 0x00b8b8b800b8b8b8,
-	0x0007070700070707, 0x0004040400040404, 0x009b9b9b009b9b9b,
-	0x0094949400949494, 0x0021212100212121, 0x0066666600666666,
-	0x00e6e6e600e6e6e6, 0x00cecece00cecece, 0x00ededed00ededed,
-	0x00e7e7e700e7e7e7, 0x003b3b3b003b3b3b, 0x00fefefe00fefefe,
-	0x007f7f7f007f7f7f, 0x00c5c5c500c5c5c5, 0x00a4a4a400a4a4a4,
-	0x0037373700373737, 0x00b1b1b100b1b1b1, 0x004c4c4c004c4c4c,
-	0x0091919100919191, 0x006e6e6e006e6e6e, 0x008d8d8d008d8d8d,
-	0x0076767600767676, 0x0003030300030303, 0x002d2d2d002d2d2d,
-	0x00dedede00dedede, 0x0096969600969696, 0x0026262600262626,
-	0x007d7d7d007d7d7d, 0x00c6c6c600c6c6c6, 0x005c5c5c005c5c5c,
-	0x00d3d3d300d3d3d3, 0x00f2f2f200f2f2f2, 0x004f4f4f004f4f4f,
-	0x0019191900191919, 0x003f3f3f003f3f3f, 0x00dcdcdc00dcdcdc,
-	0x0079797900797979, 0x001d1d1d001d1d1d, 0x0052525200525252,
-	0x00ebebeb00ebebeb, 0x00f3f3f300f3f3f3, 0x006d6d6d006d6d6d,
-	0x005e5e5e005e5e5e, 0x00fbfbfb00fbfbfb, 0x0069696900696969,
-	0x00b2b2b200b2b2b2, 0x00f0f0f000f0f0f0, 0x0031313100313131,
-	0x000c0c0c000c0c0c, 0x00d4d4d400d4d4d4, 0x00cfcfcf00cfcfcf,
-	0x008c8c8c008c8c8c, 0x00e2e2e200e2e2e2, 0x0075757500757575,
-	0x00a9a9a900a9a9a9, 0x004a4a4a004a4a4a, 0x0057575700575757,
-	0x0084848400848484, 0x0011111100111111, 0x0045454500454545,
-	0x001b1b1b001b1b1b, 0x00f5f5f500f5f5f5, 0x00e4e4e400e4e4e4,
-	0x000e0e0e000e0e0e, 0x0073737300737373, 0x00aaaaaa00aaaaaa,
-	0x00f1f1f100f1f1f1, 0x00dddddd00dddddd, 0x0059595900595959,
-	0x0014141400141414, 0x006c6c6c006c6c6c, 0x0092929200929292,
-	0x0054545400545454, 0x00d0d0d000d0d0d0, 0x0078787800787878,
-	0x0070707000707070, 0x00e3e3e300e3e3e3, 0x0049494900494949,
-	0x0080808000808080, 0x0050505000505050, 0x00a7a7a700a7a7a7,
-	0x00f6f6f600f6f6f6, 0x0077777700777777, 0x0093939300939393,
-	0x0086868600868686, 0x0083838300838383, 0x002a2a2a002a2a2a,
-	0x00c7c7c700c7c7c7, 0x005b5b5b005b5b5b, 0x00e9e9e900e9e9e9,
-	0x00eeeeee00eeeeee, 0x008f8f8f008f8f8f, 0x0001010100010101,
-	0x003d3d3d003d3d3d,
+	0x00e0e0e000e0e0e0ULL, 0x0005050500050505ULL, 0x0058585800585858ULL,
+	0x00d9d9d900d9d9d9ULL, 0x0067676700676767ULL, 0x004e4e4e004e4e4eULL,
+	0x0081818100818181ULL, 0x00cbcbcb00cbcbcbULL, 0x00c9c9c900c9c9c9ULL,
+	0x000b0b0b000b0b0bULL, 0x00aeaeae00aeaeaeULL, 0x006a6a6a006a6a6aULL,
+	0x00d5d5d500d5d5d5ULL, 0x0018181800181818ULL, 0x005d5d5d005d5d5dULL,
+	0x0082828200828282ULL, 0x0046464600464646ULL, 0x00dfdfdf00dfdfdfULL,
+	0x00d6d6d600d6d6d6ULL, 0x0027272700272727ULL, 0x008a8a8a008a8a8aULL,
+	0x0032323200323232ULL, 0x004b4b4b004b4b4bULL, 0x0042424200424242ULL,
+	0x00dbdbdb00dbdbdbULL, 0x001c1c1c001c1c1cULL, 0x009e9e9e009e9e9eULL,
+	0x009c9c9c009c9c9cULL, 0x003a3a3a003a3a3aULL, 0x00cacaca00cacacaULL,
+	0x0025252500252525ULL, 0x007b7b7b007b7b7bULL, 0x000d0d0d000d0d0dULL,
+	0x0071717100717171ULL, 0x005f5f5f005f5f5fULL, 0x001f1f1f001f1f1fULL,
+	0x00f8f8f800f8f8f8ULL, 0x00d7d7d700d7d7d7ULL, 0x003e3e3e003e3e3eULL,
+	0x009d9d9d009d9d9dULL, 0x007c7c7c007c7c7cULL, 0x0060606000606060ULL,
+	0x00b9b9b900b9b9b9ULL, 0x00bebebe00bebebeULL, 0x00bcbcbc00bcbcbcULL,
+	0x008b8b8b008b8b8bULL, 0x0016161600161616ULL, 0x0034343400343434ULL,
+	0x004d4d4d004d4d4dULL, 0x00c3c3c300c3c3c3ULL, 0x0072727200727272ULL,
+	0x0095959500959595ULL, 0x00ababab00abababULL, 0x008e8e8e008e8e8eULL,
+	0x00bababa00bababaULL, 0x007a7a7a007a7a7aULL, 0x00b3b3b300b3b3b3ULL,
+	0x0002020200020202ULL, 0x00b4b4b400b4b4b4ULL, 0x00adadad00adadadULL,
+	0x00a2a2a200a2a2a2ULL, 0x00acacac00acacacULL, 0x00d8d8d800d8d8d8ULL,
+	0x009a9a9a009a9a9aULL, 0x0017171700171717ULL, 0x001a1a1a001a1a1aULL,
+	0x0035353500353535ULL, 0x00cccccc00ccccccULL, 0x00f7f7f700f7f7f7ULL,
+	0x0099999900999999ULL, 0x0061616100616161ULL, 0x005a5a5a005a5a5aULL,
+	0x00e8e8e800e8e8e8ULL, 0x0024242400242424ULL, 0x0056565600565656ULL,
+	0x0040404000404040ULL, 0x00e1e1e100e1e1e1ULL, 0x0063636300636363ULL,
+	0x0009090900090909ULL, 0x0033333300333333ULL, 0x00bfbfbf00bfbfbfULL,
+	0x0098989800989898ULL, 0x0097979700979797ULL, 0x0085858500858585ULL,
+	0x0068686800686868ULL, 0x00fcfcfc00fcfcfcULL, 0x00ececec00ecececULL,
+	0x000a0a0a000a0a0aULL, 0x00dadada00dadadaULL, 0x006f6f6f006f6f6fULL,
+	0x0053535300535353ULL, 0x0062626200626262ULL, 0x00a3a3a300a3a3a3ULL,
+	0x002e2e2e002e2e2eULL, 0x0008080800080808ULL, 0x00afafaf00afafafULL,
+	0x0028282800282828ULL, 0x00b0b0b000b0b0b0ULL, 0x0074747400747474ULL,
+	0x00c2c2c200c2c2c2ULL, 0x00bdbdbd00bdbdbdULL, 0x0036363600363636ULL,
+	0x0022222200222222ULL, 0x0038383800383838ULL, 0x0064646400646464ULL,
+	0x001e1e1e001e1e1eULL, 0x0039393900393939ULL, 0x002c2c2c002c2c2cULL,
+	0x00a6a6a600a6a6a6ULL, 0x0030303000303030ULL, 0x00e5e5e500e5e5e5ULL,
+	0x0044444400444444ULL, 0x00fdfdfd00fdfdfdULL, 0x0088888800888888ULL,
+	0x009f9f9f009f9f9fULL, 0x0065656500656565ULL, 0x0087878700878787ULL,
+	0x006b6b6b006b6b6bULL, 0x00f4f4f400f4f4f4ULL, 0x0023232300232323ULL,
+	0x0048484800484848ULL, 0x0010101000101010ULL, 0x00d1d1d100d1d1d1ULL,
+	0x0051515100515151ULL, 0x00c0c0c000c0c0c0ULL, 0x00f9f9f900f9f9f9ULL,
+	0x00d2d2d200d2d2d2ULL, 0x00a0a0a000a0a0a0ULL, 0x0055555500555555ULL,
+	0x00a1a1a100a1a1a1ULL, 0x0041414100414141ULL, 0x00fafafa00fafafaULL,
+	0x0043434300434343ULL, 0x0013131300131313ULL, 0x00c4c4c400c4c4c4ULL,
+	0x002f2f2f002f2f2fULL, 0x00a8a8a800a8a8a8ULL, 0x00b6b6b600b6b6b6ULL,
+	0x003c3c3c003c3c3cULL, 0x002b2b2b002b2b2bULL, 0x00c1c1c100c1c1c1ULL,
+	0x00ffffff00ffffffULL, 0x00c8c8c800c8c8c8ULL, 0x00a5a5a500a5a5a5ULL,
+	0x0020202000202020ULL, 0x0089898900898989ULL, 0x0000000000000000ULL,
+	0x0090909000909090ULL, 0x0047474700474747ULL, 0x00efefef00efefefULL,
+	0x00eaeaea00eaeaeaULL, 0x00b7b7b700b7b7b7ULL, 0x0015151500151515ULL,
+	0x0006060600060606ULL, 0x00cdcdcd00cdcdcdULL, 0x00b5b5b500b5b5b5ULL,
+	0x0012121200121212ULL, 0x007e7e7e007e7e7eULL, 0x00bbbbbb00bbbbbbULL,
+	0x0029292900292929ULL, 0x000f0f0f000f0f0fULL, 0x00b8b8b800b8b8b8ULL,
+	0x0007070700070707ULL, 0x0004040400040404ULL, 0x009b9b9b009b9b9bULL,
+	0x0094949400949494ULL, 0x0021212100212121ULL, 0x0066666600666666ULL,
+	0x00e6e6e600e6e6e6ULL, 0x00cecece00cececeULL, 0x00ededed00edededULL,
+	0x00e7e7e700e7e7e7ULL, 0x003b3b3b003b3b3bULL, 0x00fefefe00fefefeULL,
+	0x007f7f7f007f7f7fULL, 0x00c5c5c500c5c5c5ULL, 0x00a4a4a400a4a4a4ULL,
+	0x0037373700373737ULL, 0x00b1b1b100b1b1b1ULL, 0x004c4c4c004c4c4cULL,
+	0x0091919100919191ULL, 0x006e6e6e006e6e6eULL, 0x008d8d8d008d8d8dULL,
+	0x0076767600767676ULL, 0x0003030300030303ULL, 0x002d2d2d002d2d2dULL,
+	0x00dedede00dededeULL, 0x0096969600969696ULL, 0x0026262600262626ULL,
+	0x007d7d7d007d7d7dULL, 0x00c6c6c600c6c6c6ULL, 0x005c5c5c005c5c5cULL,
+	0x00d3d3d300d3d3d3ULL, 0x00f2f2f200f2f2f2ULL, 0x004f4f4f004f4f4fULL,
+	0x0019191900191919ULL, 0x003f3f3f003f3f3fULL, 0x00dcdcdc00dcdcdcULL,
+	0x0079797900797979ULL, 0x001d1d1d001d1d1dULL, 0x0052525200525252ULL,
+	0x00ebebeb00ebebebULL, 0x00f3f3f300f3f3f3ULL, 0x006d6d6d006d6d6dULL,
+	0x005e5e5e005e5e5eULL, 0x00fbfbfb00fbfbfbULL, 0x0069696900696969ULL,
+	0x00b2b2b200b2b2b2ULL, 0x00f0f0f000f0f0f0ULL, 0x0031313100313131ULL,
+	0x000c0c0c000c0c0cULL, 0x00d4d4d400d4d4d4ULL, 0x00cfcfcf00cfcfcfULL,
+	0x008c8c8c008c8c8cULL, 0x00e2e2e200e2e2e2ULL, 0x0075757500757575ULL,
+	0x00a9a9a900a9a9a9ULL, 0x004a4a4a004a4a4aULL, 0x0057575700575757ULL,
+	0x0084848400848484ULL, 0x0011111100111111ULL, 0x0045454500454545ULL,
+	0x001b1b1b001b1b1bULL, 0x00f5f5f500f5f5f5ULL, 0x00e4e4e400e4e4e4ULL,
+	0x000e0e0e000e0e0eULL, 0x0073737300737373ULL, 0x00aaaaaa00aaaaaaULL,
+	0x00f1f1f100f1f1f1ULL, 0x00dddddd00ddddddULL, 0x0059595900595959ULL,
+	0x0014141400141414ULL, 0x006c6c6c006c6c6cULL, 0x0092929200929292ULL,
+	0x0054545400545454ULL, 0x00d0d0d000d0d0d0ULL, 0x0078787800787878ULL,
+	0x0070707000707070ULL, 0x00e3e3e300e3e3e3ULL, 0x0049494900494949ULL,
+	0x0080808000808080ULL, 0x0050505000505050ULL, 0x00a7a7a700a7a7a7ULL,
+	0x00f6f6f600f6f6f6ULL, 0x0077777700777777ULL, 0x0093939300939393ULL,
+	0x0086868600868686ULL, 0x0083838300838383ULL, 0x002a2a2a002a2a2aULL,
+	0x00c7c7c700c7c7c7ULL, 0x005b5b5b005b5b5bULL, 0x00e9e9e900e9e9e9ULL,
+	0x00eeeeee00eeeeeeULL, 0x008f8f8f008f8f8fULL, 0x0001010100010101ULL,
+	0x003d3d3d003d3d3dULL,
 };
 
 const u64 camellia_sp30333033[256] = {
-	0x3800383838003838, 0x4100414141004141, 0x1600161616001616,
-	0x7600767676007676, 0xd900d9d9d900d9d9, 0x9300939393009393,
-	0x6000606060006060, 0xf200f2f2f200f2f2, 0x7200727272007272,
-	0xc200c2c2c200c2c2, 0xab00ababab00abab, 0x9a009a9a9a009a9a,
-	0x7500757575007575, 0x0600060606000606, 0x5700575757005757,
-	0xa000a0a0a000a0a0, 0x9100919191009191, 0xf700f7f7f700f7f7,
-	0xb500b5b5b500b5b5, 0xc900c9c9c900c9c9, 0xa200a2a2a200a2a2,
-	0x8c008c8c8c008c8c, 0xd200d2d2d200d2d2, 0x9000909090009090,
-	0xf600f6f6f600f6f6, 0x0700070707000707, 0xa700a7a7a700a7a7,
-	0x2700272727002727, 0x8e008e8e8e008e8e, 0xb200b2b2b200b2b2,
-	0x4900494949004949, 0xde00dedede00dede, 0x4300434343004343,
-	0x5c005c5c5c005c5c, 0xd700d7d7d700d7d7, 0xc700c7c7c700c7c7,
-	0x3e003e3e3e003e3e, 0xf500f5f5f500f5f5, 0x8f008f8f8f008f8f,
-	0x6700676767006767, 0x1f001f1f1f001f1f, 0x1800181818001818,
-	0x6e006e6e6e006e6e, 0xaf00afafaf00afaf, 0x2f002f2f2f002f2f,
-	0xe200e2e2e200e2e2, 0x8500858585008585, 0x0d000d0d0d000d0d,
-	0x5300535353005353, 0xf000f0f0f000f0f0, 0x9c009c9c9c009c9c,
-	0x6500656565006565, 0xea00eaeaea00eaea, 0xa300a3a3a300a3a3,
-	0xae00aeaeae00aeae, 0x9e009e9e9e009e9e, 0xec00ececec00ecec,
-	0x8000808080008080, 0x2d002d2d2d002d2d, 0x6b006b6b6b006b6b,
-	0xa800a8a8a800a8a8, 0x2b002b2b2b002b2b, 0x3600363636003636,
-	0xa600a6a6a600a6a6, 0xc500c5c5c500c5c5, 0x8600868686008686,
-	0x4d004d4d4d004d4d, 0x3300333333003333, 0xfd00fdfdfd00fdfd,
-	0x6600666666006666, 0x5800585858005858, 0x9600969696009696,
-	0x3a003a3a3a003a3a, 0x0900090909000909, 0x9500959595009595,
-	0x1000101010001010, 0x7800787878007878, 0xd800d8d8d800d8d8,
-	0x4200424242004242, 0xcc00cccccc00cccc, 0xef00efefef00efef,
-	0x2600262626002626, 0xe500e5e5e500e5e5, 0x6100616161006161,
-	0x1a001a1a1a001a1a, 0x3f003f3f3f003f3f, 0x3b003b3b3b003b3b,
-	0x8200828282008282, 0xb600b6b6b600b6b6, 0xdb00dbdbdb00dbdb,
-	0xd400d4d4d400d4d4, 0x9800989898009898, 0xe800e8e8e800e8e8,
-	0x8b008b8b8b008b8b, 0x0200020202000202, 0xeb00ebebeb00ebeb,
-	0x0a000a0a0a000a0a, 0x2c002c2c2c002c2c, 0x1d001d1d1d001d1d,
-	0xb000b0b0b000b0b0, 0x6f006f6f6f006f6f, 0x8d008d8d8d008d8d,
-	0x8800888888008888, 0x0e000e0e0e000e0e, 0x1900191919001919,
-	0x8700878787008787, 0x4e004e4e4e004e4e, 0x0b000b0b0b000b0b,
-	0xa900a9a9a900a9a9, 0x0c000c0c0c000c0c, 0x7900797979007979,
-	0x1100111111001111, 0x7f007f7f7f007f7f, 0x2200222222002222,
-	0xe700e7e7e700e7e7, 0x5900595959005959, 0xe100e1e1e100e1e1,
-	0xda00dadada00dada, 0x3d003d3d3d003d3d, 0xc800c8c8c800c8c8,
-	0x1200121212001212, 0x0400040404000404, 0x7400747474007474,
-	0x5400545454005454, 0x3000303030003030, 0x7e007e7e7e007e7e,
-	0xb400b4b4b400b4b4, 0x2800282828002828, 0x5500555555005555,
-	0x6800686868006868, 0x5000505050005050, 0xbe00bebebe00bebe,
-	0xd000d0d0d000d0d0, 0xc400c4c4c400c4c4, 0x3100313131003131,
-	0xcb00cbcbcb00cbcb, 0x2a002a2a2a002a2a, 0xad00adadad00adad,
-	0x0f000f0f0f000f0f, 0xca00cacaca00caca, 0x7000707070007070,
-	0xff00ffffff00ffff, 0x3200323232003232, 0x6900696969006969,
-	0x0800080808000808, 0x6200626262006262, 0x0000000000000000,
-	0x2400242424002424, 0xd100d1d1d100d1d1, 0xfb00fbfbfb00fbfb,
-	0xba00bababa00baba, 0xed00ededed00eded, 0x4500454545004545,
-	0x8100818181008181, 0x7300737373007373, 0x6d006d6d6d006d6d,
-	0x8400848484008484, 0x9f009f9f9f009f9f, 0xee00eeeeee00eeee,
-	0x4a004a4a4a004a4a, 0xc300c3c3c300c3c3, 0x2e002e2e2e002e2e,
-	0xc100c1c1c100c1c1, 0x0100010101000101, 0xe600e6e6e600e6e6,
-	0x2500252525002525, 0x4800484848004848, 0x9900999999009999,
-	0xb900b9b9b900b9b9, 0xb300b3b3b300b3b3, 0x7b007b7b7b007b7b,
-	0xf900f9f9f900f9f9, 0xce00cecece00cece, 0xbf00bfbfbf00bfbf,
-	0xdf00dfdfdf00dfdf, 0x7100717171007171, 0x2900292929002929,
-	0xcd00cdcdcd00cdcd, 0x6c006c6c6c006c6c, 0x1300131313001313,
-	0x6400646464006464, 0x9b009b9b9b009b9b, 0x6300636363006363,
-	0x9d009d9d9d009d9d, 0xc000c0c0c000c0c0, 0x4b004b4b4b004b4b,
-	0xb700b7b7b700b7b7, 0xa500a5a5a500a5a5, 0x8900898989008989,
-	0x5f005f5f5f005f5f, 0xb100b1b1b100b1b1, 0x1700171717001717,
-	0xf400f4f4f400f4f4, 0xbc00bcbcbc00bcbc, 0xd300d3d3d300d3d3,
-	0x4600464646004646, 0xcf00cfcfcf00cfcf, 0x3700373737003737,
-	0x5e005e5e5e005e5e, 0x4700474747004747, 0x9400949494009494,
-	0xfa00fafafa00fafa, 0xfc00fcfcfc00fcfc, 0x5b005b5b5b005b5b,
-	0x9700979797009797, 0xfe00fefefe00fefe, 0x5a005a5a5a005a5a,
-	0xac00acacac00acac, 0x3c003c3c3c003c3c, 0x4c004c4c4c004c4c,
-	0x0300030303000303, 0x3500353535003535, 0xf300f3f3f300f3f3,
-	0x2300232323002323, 0xb800b8b8b800b8b8, 0x5d005d5d5d005d5d,
-	0x6a006a6a6a006a6a, 0x9200929292009292, 0xd500d5d5d500d5d5,
-	0x2100212121002121, 0x4400444444004444, 0x5100515151005151,
-	0xc600c6c6c600c6c6, 0x7d007d7d7d007d7d, 0x3900393939003939,
-	0x8300838383008383, 0xdc00dcdcdc00dcdc, 0xaa00aaaaaa00aaaa,
-	0x7c007c7c7c007c7c, 0x7700777777007777, 0x5600565656005656,
-	0x0500050505000505, 0x1b001b1b1b001b1b, 0xa400a4a4a400a4a4,
-	0x1500151515001515, 0x3400343434003434, 0x1e001e1e1e001e1e,
-	0x1c001c1c1c001c1c, 0xf800f8f8f800f8f8, 0x5200525252005252,
-	0x2000202020002020, 0x1400141414001414, 0xe900e9e9e900e9e9,
-	0xbd00bdbdbd00bdbd, 0xdd00dddddd00dddd, 0xe400e4e4e400e4e4,
-	0xa100a1a1a100a1a1, 0xe000e0e0e000e0e0, 0x8a008a8a8a008a8a,
-	0xf100f1f1f100f1f1, 0xd600d6d6d600d6d6, 0x7a007a7a7a007a7a,
-	0xbb00bbbbbb00bbbb, 0xe300e3e3e300e3e3, 0x4000404040004040,
-	0x4f004f4f4f004f4f,
+	0x3800383838003838ULL, 0x4100414141004141ULL, 0x1600161616001616ULL,
+	0x7600767676007676ULL, 0xd900d9d9d900d9d9ULL, 0x9300939393009393ULL,
+	0x6000606060006060ULL, 0xf200f2f2f200f2f2ULL, 0x7200727272007272ULL,
+	0xc200c2c2c200c2c2ULL, 0xab00ababab00ababULL, 0x9a009a9a9a009a9aULL,
+	0x7500757575007575ULL, 0x0600060606000606ULL, 0x5700575757005757ULL,
+	0xa000a0a0a000a0a0ULL, 0x9100919191009191ULL, 0xf700f7f7f700f7f7ULL,
+	0xb500b5b5b500b5b5ULL, 0xc900c9c9c900c9c9ULL, 0xa200a2a2a200a2a2ULL,
+	0x8c008c8c8c008c8cULL, 0xd200d2d2d200d2d2ULL, 0x9000909090009090ULL,
+	0xf600f6f6f600f6f6ULL, 0x0700070707000707ULL, 0xa700a7a7a700a7a7ULL,
+	0x2700272727002727ULL, 0x8e008e8e8e008e8eULL, 0xb200b2b2b200b2b2ULL,
+	0x4900494949004949ULL, 0xde00dedede00dedeULL, 0x4300434343004343ULL,
+	0x5c005c5c5c005c5cULL, 0xd700d7d7d700d7d7ULL, 0xc700c7c7c700c7c7ULL,
+	0x3e003e3e3e003e3eULL, 0xf500f5f5f500f5f5ULL, 0x8f008f8f8f008f8fULL,
+	0x6700676767006767ULL, 0x1f001f1f1f001f1fULL, 0x1800181818001818ULL,
+	0x6e006e6e6e006e6eULL, 0xaf00afafaf00afafULL, 0x2f002f2f2f002f2fULL,
+	0xe200e2e2e200e2e2ULL, 0x8500858585008585ULL, 0x0d000d0d0d000d0dULL,
+	0x5300535353005353ULL, 0xf000f0f0f000f0f0ULL, 0x9c009c9c9c009c9cULL,
+	0x6500656565006565ULL, 0xea00eaeaea00eaeaULL, 0xa300a3a3a300a3a3ULL,
+	0xae00aeaeae00aeaeULL, 0x9e009e9e9e009e9eULL, 0xec00ececec00ececULL,
+	0x8000808080008080ULL, 0x2d002d2d2d002d2dULL, 0x6b006b6b6b006b6bULL,
+	0xa800a8a8a800a8a8ULL, 0x2b002b2b2b002b2bULL, 0x3600363636003636ULL,
+	0xa600a6a6a600a6a6ULL, 0xc500c5c5c500c5c5ULL, 0x8600868686008686ULL,
+	0x4d004d4d4d004d4dULL, 0x3300333333003333ULL, 0xfd00fdfdfd00fdfdULL,
+	0x6600666666006666ULL, 0x5800585858005858ULL, 0x9600969696009696ULL,
+	0x3a003a3a3a003a3aULL, 0x0900090909000909ULL, 0x9500959595009595ULL,
+	0x1000101010001010ULL, 0x7800787878007878ULL, 0xd800d8d8d800d8d8ULL,
+	0x4200424242004242ULL, 0xcc00cccccc00ccccULL, 0xef00efefef00efefULL,
+	0x2600262626002626ULL, 0xe500e5e5e500e5e5ULL, 0x6100616161006161ULL,
+	0x1a001a1a1a001a1aULL, 0x3f003f3f3f003f3fULL, 0x3b003b3b3b003b3bULL,
+	0x8200828282008282ULL, 0xb600b6b6b600b6b6ULL, 0xdb00dbdbdb00dbdbULL,
+	0xd400d4d4d400d4d4ULL, 0x9800989898009898ULL, 0xe800e8e8e800e8e8ULL,
+	0x8b008b8b8b008b8bULL, 0x0200020202000202ULL, 0xeb00ebebeb00ebebULL,
+	0x0a000a0a0a000a0aULL, 0x2c002c2c2c002c2cULL, 0x1d001d1d1d001d1dULL,
+	0xb000b0b0b000b0b0ULL, 0x6f006f6f6f006f6fULL, 0x8d008d8d8d008d8dULL,
+	0x8800888888008888ULL, 0x0e000e0e0e000e0eULL, 0x1900191919001919ULL,
+	0x8700878787008787ULL, 0x4e004e4e4e004e4eULL, 0x0b000b0b0b000b0bULL,
+	0xa900a9a9a900a9a9ULL, 0x0c000c0c0c000c0cULL, 0x7900797979007979ULL,
+	0x1100111111001111ULL, 0x7f007f7f7f007f7fULL, 0x2200222222002222ULL,
+	0xe700e7e7e700e7e7ULL, 0x5900595959005959ULL, 0xe100e1e1e100e1e1ULL,
+	0xda00dadada00dadaULL, 0x3d003d3d3d003d3dULL, 0xc800c8c8c800c8c8ULL,
+	0x1200121212001212ULL, 0x0400040404000404ULL, 0x7400747474007474ULL,
+	0x5400545454005454ULL, 0x3000303030003030ULL, 0x7e007e7e7e007e7eULL,
+	0xb400b4b4b400b4b4ULL, 0x2800282828002828ULL, 0x5500555555005555ULL,
+	0x6800686868006868ULL, 0x5000505050005050ULL, 0xbe00bebebe00bebeULL,
+	0xd000d0d0d000d0d0ULL, 0xc400c4c4c400c4c4ULL, 0x3100313131003131ULL,
+	0xcb00cbcbcb00cbcbULL, 0x2a002a2a2a002a2aULL, 0xad00adadad00adadULL,
+	0x0f000f0f0f000f0fULL, 0xca00cacaca00cacaULL, 0x7000707070007070ULL,
+	0xff00ffffff00ffffULL, 0x3200323232003232ULL, 0x6900696969006969ULL,
+	0x0800080808000808ULL, 0x6200626262006262ULL, 0x0000000000000000ULL,
+	0x2400242424002424ULL, 0xd100d1d1d100d1d1ULL, 0xfb00fbfbfb00fbfbULL,
+	0xba00bababa00babaULL, 0xed00ededed00ededULL, 0x4500454545004545ULL,
+	0x8100818181008181ULL, 0x7300737373007373ULL, 0x6d006d6d6d006d6dULL,
+	0x8400848484008484ULL, 0x9f009f9f9f009f9fULL, 0xee00eeeeee00eeeeULL,
+	0x4a004a4a4a004a4aULL, 0xc300c3c3c300c3c3ULL, 0x2e002e2e2e002e2eULL,
+	0xc100c1c1c100c1c1ULL, 0x0100010101000101ULL, 0xe600e6e6e600e6e6ULL,
+	0x2500252525002525ULL, 0x4800484848004848ULL, 0x9900999999009999ULL,
+	0xb900b9b9b900b9b9ULL, 0xb300b3b3b300b3b3ULL, 0x7b007b7b7b007b7bULL,
+	0xf900f9f9f900f9f9ULL, 0xce00cecece00ceceULL, 0xbf00bfbfbf00bfbfULL,
+	0xdf00dfdfdf00dfdfULL, 0x7100717171007171ULL, 0x2900292929002929ULL,
+	0xcd00cdcdcd00cdcdULL, 0x6c006c6c6c006c6cULL, 0x1300131313001313ULL,
+	0x6400646464006464ULL, 0x9b009b9b9b009b9bULL, 0x6300636363006363ULL,
+	0x9d009d9d9d009d9dULL, 0xc000c0c0c000c0c0ULL, 0x4b004b4b4b004b4bULL,
+	0xb700b7b7b700b7b7ULL, 0xa500a5a5a500a5a5ULL, 0x8900898989008989ULL,
+	0x5f005f5f5f005f5fULL, 0xb100b1b1b100b1b1ULL, 0x1700171717001717ULL,
+	0xf400f4f4f400f4f4ULL, 0xbc00bcbcbc00bcbcULL, 0xd300d3d3d300d3d3ULL,
+	0x4600464646004646ULL, 0xcf00cfcfcf00cfcfULL, 0x3700373737003737ULL,
+	0x5e005e5e5e005e5eULL, 0x4700474747004747ULL, 0x9400949494009494ULL,
+	0xfa00fafafa00fafaULL, 0xfc00fcfcfc00fcfcULL, 0x5b005b5b5b005b5bULL,
+	0x9700979797009797ULL, 0xfe00fefefe00fefeULL, 0x5a005a5a5a005a5aULL,
+	0xac00acacac00acacULL, 0x3c003c3c3c003c3cULL, 0x4c004c4c4c004c4cULL,
+	0x0300030303000303ULL, 0x3500353535003535ULL, 0xf300f3f3f300f3f3ULL,
+	0x2300232323002323ULL, 0xb800b8b8b800b8b8ULL, 0x5d005d5d5d005d5dULL,
+	0x6a006a6a6a006a6aULL, 0x9200929292009292ULL, 0xd500d5d5d500d5d5ULL,
+	0x2100212121002121ULL, 0x4400444444004444ULL, 0x5100515151005151ULL,
+	0xc600c6c6c600c6c6ULL, 0x7d007d7d7d007d7dULL, 0x3900393939003939ULL,
+	0x8300838383008383ULL, 0xdc00dcdcdc00dcdcULL, 0xaa00aaaaaa00aaaaULL,
+	0x7c007c7c7c007c7cULL, 0x7700777777007777ULL, 0x5600565656005656ULL,
+	0x0500050505000505ULL, 0x1b001b1b1b001b1bULL, 0xa400a4a4a400a4a4ULL,
+	0x1500151515001515ULL, 0x3400343434003434ULL, 0x1e001e1e1e001e1eULL,
+	0x1c001c1c1c001c1cULL, 0xf800f8f8f800f8f8ULL, 0x5200525252005252ULL,
+	0x2000202020002020ULL, 0x1400141414001414ULL, 0xe900e9e9e900e9e9ULL,
+	0xbd00bdbdbd00bdbdULL, 0xdd00dddddd00ddddULL, 0xe400e4e4e400e4e4ULL,
+	0xa100a1a1a100a1a1ULL, 0xe000e0e0e000e0e0ULL, 0x8a008a8a8a008a8aULL,
+	0xf100f1f1f100f1f1ULL, 0xd600d6d6d600d6d6ULL, 0x7a007a7a7a007a7aULL,
+	0xbb00bbbbbb00bbbbULL, 0xe300e3e3e300e3e3ULL, 0x4000404040004040ULL,
+	0x4f004f4f4f004f4fULL,
 };
 
 const u64 camellia_sp44044404[256] = {
-	0x7070007070700070, 0x2c2c002c2c2c002c, 0xb3b300b3b3b300b3,
-	0xc0c000c0c0c000c0, 0xe4e400e4e4e400e4, 0x5757005757570057,
-	0xeaea00eaeaea00ea, 0xaeae00aeaeae00ae, 0x2323002323230023,
-	0x6b6b006b6b6b006b, 0x4545004545450045, 0xa5a500a5a5a500a5,
-	0xeded00ededed00ed, 0x4f4f004f4f4f004f, 0x1d1d001d1d1d001d,
-	0x9292009292920092, 0x8686008686860086, 0xafaf00afafaf00af,
-	0x7c7c007c7c7c007c, 0x1f1f001f1f1f001f, 0x3e3e003e3e3e003e,
-	0xdcdc00dcdcdc00dc, 0x5e5e005e5e5e005e, 0x0b0b000b0b0b000b,
-	0xa6a600a6a6a600a6, 0x3939003939390039, 0xd5d500d5d5d500d5,
-	0x5d5d005d5d5d005d, 0xd9d900d9d9d900d9, 0x5a5a005a5a5a005a,
-	0x5151005151510051, 0x6c6c006c6c6c006c, 0x8b8b008b8b8b008b,
-	0x9a9a009a9a9a009a, 0xfbfb00fbfbfb00fb, 0xb0b000b0b0b000b0,
-	0x7474007474740074, 0x2b2b002b2b2b002b, 0xf0f000f0f0f000f0,
-	0x8484008484840084, 0xdfdf00dfdfdf00df, 0xcbcb00cbcbcb00cb,
-	0x3434003434340034, 0x7676007676760076, 0x6d6d006d6d6d006d,
-	0xa9a900a9a9a900a9, 0xd1d100d1d1d100d1, 0x0404000404040004,
-	0x1414001414140014, 0x3a3a003a3a3a003a, 0xdede00dedede00de,
-	0x1111001111110011, 0x3232003232320032, 0x9c9c009c9c9c009c,
-	0x5353005353530053, 0xf2f200f2f2f200f2, 0xfefe00fefefe00fe,
-	0xcfcf00cfcfcf00cf, 0xc3c300c3c3c300c3, 0x7a7a007a7a7a007a,
-	0x2424002424240024, 0xe8e800e8e8e800e8, 0x6060006060600060,
-	0x6969006969690069, 0xaaaa00aaaaaa00aa, 0xa0a000a0a0a000a0,
-	0xa1a100a1a1a100a1, 0x6262006262620062, 0x5454005454540054,
-	0x1e1e001e1e1e001e, 0xe0e000e0e0e000e0, 0x6464006464640064,
-	0x1010001010100010, 0x0000000000000000, 0xa3a300a3a3a300a3,
-	0x7575007575750075, 0x8a8a008a8a8a008a, 0xe6e600e6e6e600e6,
-	0x0909000909090009, 0xdddd00dddddd00dd, 0x8787008787870087,
-	0x8383008383830083, 0xcdcd00cdcdcd00cd, 0x9090009090900090,
-	0x7373007373730073, 0xf6f600f6f6f600f6, 0x9d9d009d9d9d009d,
-	0xbfbf00bfbfbf00bf, 0x5252005252520052, 0xd8d800d8d8d800d8,
-	0xc8c800c8c8c800c8, 0xc6c600c6c6c600c6, 0x8181008181810081,
-	0x6f6f006f6f6f006f, 0x1313001313130013, 0x6363006363630063,
-	0xe9e900e9e9e900e9, 0xa7a700a7a7a700a7, 0x9f9f009f9f9f009f,
-	0xbcbc00bcbcbc00bc, 0x2929002929290029, 0xf9f900f9f9f900f9,
-	0x2f2f002f2f2f002f, 0xb4b400b4b4b400b4, 0x7878007878780078,
-	0x0606000606060006, 0xe7e700e7e7e700e7, 0x7171007171710071,
-	0xd4d400d4d4d400d4, 0xabab00ababab00ab, 0x8888008888880088,
-	0x8d8d008d8d8d008d, 0x7272007272720072, 0xb9b900b9b9b900b9,
-	0xf8f800f8f8f800f8, 0xacac00acacac00ac, 0x3636003636360036,
-	0x2a2a002a2a2a002a, 0x3c3c003c3c3c003c, 0xf1f100f1f1f100f1,
-	0x4040004040400040, 0xd3d300d3d3d300d3, 0xbbbb00bbbbbb00bb,
-	0x4343004343430043, 0x1515001515150015, 0xadad00adadad00ad,
-	0x7777007777770077, 0x8080008080800080, 0x8282008282820082,
-	0xecec00ececec00ec, 0x2727002727270027, 0xe5e500e5e5e500e5,
-	0x8585008585850085, 0x3535003535350035, 0x0c0c000c0c0c000c,
-	0x4141004141410041, 0xefef00efefef00ef, 0x9393009393930093,
-	0x1919001919190019, 0x2121002121210021, 0x0e0e000e0e0e000e,
-	0x4e4e004e4e4e004e, 0x6565006565650065, 0xbdbd00bdbdbd00bd,
-	0xb8b800b8b8b800b8, 0x8f8f008f8f8f008f, 0xebeb00ebebeb00eb,
-	0xcece00cecece00ce, 0x3030003030300030, 0x5f5f005f5f5f005f,
-	0xc5c500c5c5c500c5, 0x1a1a001a1a1a001a, 0xe1e100e1e1e100e1,
-	0xcaca00cacaca00ca, 0x4747004747470047, 0x3d3d003d3d3d003d,
-	0x0101000101010001, 0xd6d600d6d6d600d6, 0x5656005656560056,
-	0x4d4d004d4d4d004d, 0x0d0d000d0d0d000d, 0x6666006666660066,
-	0xcccc00cccccc00cc, 0x2d2d002d2d2d002d, 0x1212001212120012,
-	0x2020002020200020, 0xb1b100b1b1b100b1, 0x9999009999990099,
-	0x4c4c004c4c4c004c, 0xc2c200c2c2c200c2, 0x7e7e007e7e7e007e,
-	0x0505000505050005, 0xb7b700b7b7b700b7, 0x3131003131310031,
-	0x1717001717170017, 0xd7d700d7d7d700d7, 0x5858005858580058,
-	0x6161006161610061, 0x1b1b001b1b1b001b, 0x1c1c001c1c1c001c,
-	0x0f0f000f0f0f000f, 0x1616001616160016, 0x1818001818180018,
-	0x2222002222220022, 0x4444004444440044, 0xb2b200b2b2b200b2,
-	0xb5b500b5b5b500b5, 0x9191009191910091, 0x0808000808080008,
-	0xa8a800a8a8a800a8, 0xfcfc00fcfcfc00fc, 0x5050005050500050,
-	0xd0d000d0d0d000d0, 0x7d7d007d7d7d007d, 0x8989008989890089,
-	0x9797009797970097, 0x5b5b005b5b5b005b, 0x9595009595950095,
-	0xffff00ffffff00ff, 0xd2d200d2d2d200d2, 0xc4c400c4c4c400c4,
-	0x4848004848480048, 0xf7f700f7f7f700f7, 0xdbdb00dbdbdb00db,
-	0x0303000303030003, 0xdada00dadada00da, 0x3f3f003f3f3f003f,
-	0x9494009494940094, 0x5c5c005c5c5c005c, 0x0202000202020002,
-	0x4a4a004a4a4a004a, 0x3333003333330033, 0x6767006767670067,
-	0xf3f300f3f3f300f3, 0x7f7f007f7f7f007f, 0xe2e200e2e2e200e2,
-	0x9b9b009b9b9b009b, 0x2626002626260026, 0x3737003737370037,
-	0x3b3b003b3b3b003b, 0x9696009696960096, 0x4b4b004b4b4b004b,
-	0xbebe00bebebe00be, 0x2e2e002e2e2e002e, 0x7979007979790079,
-	0x8c8c008c8c8c008c, 0x6e6e006e6e6e006e, 0x8e8e008e8e8e008e,
-	0xf5f500f5f5f500f5, 0xb6b600b6b6b600b6, 0xfdfd00fdfdfd00fd,
-	0x5959005959590059, 0x9898009898980098, 0x6a6a006a6a6a006a,
-	0x4646004646460046, 0xbaba00bababa00ba, 0x2525002525250025,
-	0x4242004242420042, 0xa2a200a2a2a200a2, 0xfafa00fafafa00fa,
-	0x0707000707070007, 0x5555005555550055, 0xeeee00eeeeee00ee,
-	0x0a0a000a0a0a000a, 0x4949004949490049, 0x6868006868680068,
-	0x3838003838380038, 0xa4a400a4a4a400a4, 0x2828002828280028,
-	0x7b7b007b7b7b007b, 0xc9c900c9c9c900c9, 0xc1c100c1c1c100c1,
-	0xe3e300e3e3e300e3, 0xf4f400f4f4f400f4, 0xc7c700c7c7c700c7,
-	0x9e9e009e9e9e009e,
+	0x7070007070700070ULL, 0x2c2c002c2c2c002cULL, 0xb3b300b3b3b300b3ULL,
+	0xc0c000c0c0c000c0ULL, 0xe4e400e4e4e400e4ULL, 0x5757005757570057ULL,
+	0xeaea00eaeaea00eaULL, 0xaeae00aeaeae00aeULL, 0x2323002323230023ULL,
+	0x6b6b006b6b6b006bULL, 0x4545004545450045ULL, 0xa5a500a5a5a500a5ULL,
+	0xeded00ededed00edULL, 0x4f4f004f4f4f004fULL, 0x1d1d001d1d1d001dULL,
+	0x9292009292920092ULL, 0x8686008686860086ULL, 0xafaf00afafaf00afULL,
+	0x7c7c007c7c7c007cULL, 0x1f1f001f1f1f001fULL, 0x3e3e003e3e3e003eULL,
+	0xdcdc00dcdcdc00dcULL, 0x5e5e005e5e5e005eULL, 0x0b0b000b0b0b000bULL,
+	0xa6a600a6a6a600a6ULL, 0x3939003939390039ULL, 0xd5d500d5d5d500d5ULL,
+	0x5d5d005d5d5d005dULL, 0xd9d900d9d9d900d9ULL, 0x5a5a005a5a5a005aULL,
+	0x5151005151510051ULL, 0x6c6c006c6c6c006cULL, 0x8b8b008b8b8b008bULL,
+	0x9a9a009a9a9a009aULL, 0xfbfb00fbfbfb00fbULL, 0xb0b000b0b0b000b0ULL,
+	0x7474007474740074ULL, 0x2b2b002b2b2b002bULL, 0xf0f000f0f0f000f0ULL,
+	0x8484008484840084ULL, 0xdfdf00dfdfdf00dfULL, 0xcbcb00cbcbcb00cbULL,
+	0x3434003434340034ULL, 0x7676007676760076ULL, 0x6d6d006d6d6d006dULL,
+	0xa9a900a9a9a900a9ULL, 0xd1d100d1d1d100d1ULL, 0x0404000404040004ULL,
+	0x1414001414140014ULL, 0x3a3a003a3a3a003aULL, 0xdede00dedede00deULL,
+	0x1111001111110011ULL, 0x3232003232320032ULL, 0x9c9c009c9c9c009cULL,
+	0x5353005353530053ULL, 0xf2f200f2f2f200f2ULL, 0xfefe00fefefe00feULL,
+	0xcfcf00cfcfcf00cfULL, 0xc3c300c3c3c300c3ULL, 0x7a7a007a7a7a007aULL,
+	0x2424002424240024ULL, 0xe8e800e8e8e800e8ULL, 0x6060006060600060ULL,
+	0x6969006969690069ULL, 0xaaaa00aaaaaa00aaULL, 0xa0a000a0a0a000a0ULL,
+	0xa1a100a1a1a100a1ULL, 0x6262006262620062ULL, 0x5454005454540054ULL,
+	0x1e1e001e1e1e001eULL, 0xe0e000e0e0e000e0ULL, 0x6464006464640064ULL,
+	0x1010001010100010ULL, 0x0000000000000000ULL, 0xa3a300a3a3a300a3ULL,
+	0x7575007575750075ULL, 0x8a8a008a8a8a008aULL, 0xe6e600e6e6e600e6ULL,
+	0x0909000909090009ULL, 0xdddd00dddddd00ddULL, 0x8787008787870087ULL,
+	0x8383008383830083ULL, 0xcdcd00cdcdcd00cdULL, 0x9090009090900090ULL,
+	0x7373007373730073ULL, 0xf6f600f6f6f600f6ULL, 0x9d9d009d9d9d009dULL,
+	0xbfbf00bfbfbf00bfULL, 0x5252005252520052ULL, 0xd8d800d8d8d800d8ULL,
+	0xc8c800c8c8c800c8ULL, 0xc6c600c6c6c600c6ULL, 0x8181008181810081ULL,
+	0x6f6f006f6f6f006fULL, 0x1313001313130013ULL, 0x6363006363630063ULL,
+	0xe9e900e9e9e900e9ULL, 0xa7a700a7a7a700a7ULL, 0x9f9f009f9f9f009fULL,
+	0xbcbc00bcbcbc00bcULL, 0x2929002929290029ULL, 0xf9f900f9f9f900f9ULL,
+	0x2f2f002f2f2f002fULL, 0xb4b400b4b4b400b4ULL, 0x7878007878780078ULL,
+	0x0606000606060006ULL, 0xe7e700e7e7e700e7ULL, 0x7171007171710071ULL,
+	0xd4d400d4d4d400d4ULL, 0xabab00ababab00abULL, 0x8888008888880088ULL,
+	0x8d8d008d8d8d008dULL, 0x7272007272720072ULL, 0xb9b900b9b9b900b9ULL,
+	0xf8f800f8f8f800f8ULL, 0xacac00acacac00acULL, 0x3636003636360036ULL,
+	0x2a2a002a2a2a002aULL, 0x3c3c003c3c3c003cULL, 0xf1f100f1f1f100f1ULL,
+	0x4040004040400040ULL, 0xd3d300d3d3d300d3ULL, 0xbbbb00bbbbbb00bbULL,
+	0x4343004343430043ULL, 0x1515001515150015ULL, 0xadad00adadad00adULL,
+	0x7777007777770077ULL, 0x8080008080800080ULL, 0x8282008282820082ULL,
+	0xecec00ececec00ecULL, 0x2727002727270027ULL, 0xe5e500e5e5e500e5ULL,
+	0x8585008585850085ULL, 0x3535003535350035ULL, 0x0c0c000c0c0c000cULL,
+	0x4141004141410041ULL, 0xefef00efefef00efULL, 0x9393009393930093ULL,
+	0x1919001919190019ULL, 0x2121002121210021ULL, 0x0e0e000e0e0e000eULL,
+	0x4e4e004e4e4e004eULL, 0x6565006565650065ULL, 0xbdbd00bdbdbd00bdULL,
+	0xb8b800b8b8b800b8ULL, 0x8f8f008f8f8f008fULL, 0xebeb00ebebeb00ebULL,
+	0xcece00cecece00ceULL, 0x3030003030300030ULL, 0x5f5f005f5f5f005fULL,
+	0xc5c500c5c5c500c5ULL, 0x1a1a001a1a1a001aULL, 0xe1e100e1e1e100e1ULL,
+	0xcaca00cacaca00caULL, 0x4747004747470047ULL, 0x3d3d003d3d3d003dULL,
+	0x0101000101010001ULL, 0xd6d600d6d6d600d6ULL, 0x5656005656560056ULL,
+	0x4d4d004d4d4d004dULL, 0x0d0d000d0d0d000dULL, 0x6666006666660066ULL,
+	0xcccc00cccccc00ccULL, 0x2d2d002d2d2d002dULL, 0x1212001212120012ULL,
+	0x2020002020200020ULL, 0xb1b100b1b1b100b1ULL, 0x9999009999990099ULL,
+	0x4c4c004c4c4c004cULL, 0xc2c200c2c2c200c2ULL, 0x7e7e007e7e7e007eULL,
+	0x0505000505050005ULL, 0xb7b700b7b7b700b7ULL, 0x3131003131310031ULL,
+	0x1717001717170017ULL, 0xd7d700d7d7d700d7ULL, 0x5858005858580058ULL,
+	0x6161006161610061ULL, 0x1b1b001b1b1b001bULL, 0x1c1c001c1c1c001cULL,
+	0x0f0f000f0f0f000fULL, 0x1616001616160016ULL, 0x1818001818180018ULL,
+	0x2222002222220022ULL, 0x4444004444440044ULL, 0xb2b200b2b2b200b2ULL,
+	0xb5b500b5b5b500b5ULL, 0x9191009191910091ULL, 0x0808000808080008ULL,
+	0xa8a800a8a8a800a8ULL, 0xfcfc00fcfcfc00fcULL, 0x5050005050500050ULL,
+	0xd0d000d0d0d000d0ULL, 0x7d7d007d7d7d007dULL, 0x8989008989890089ULL,
+	0x9797009797970097ULL, 0x5b5b005b5b5b005bULL, 0x9595009595950095ULL,
+	0xffff00ffffff00ffULL, 0xd2d200d2d2d200d2ULL, 0xc4c400c4c4c400c4ULL,
+	0x4848004848480048ULL, 0xf7f700f7f7f700f7ULL, 0xdbdb00dbdbdb00dbULL,
+	0x0303000303030003ULL, 0xdada00dadada00daULL, 0x3f3f003f3f3f003fULL,
+	0x9494009494940094ULL, 0x5c5c005c5c5c005cULL, 0x0202000202020002ULL,
+	0x4a4a004a4a4a004aULL, 0x3333003333330033ULL, 0x6767006767670067ULL,
+	0xf3f300f3f3f300f3ULL, 0x7f7f007f7f7f007fULL, 0xe2e200e2e2e200e2ULL,
+	0x9b9b009b9b9b009bULL, 0x2626002626260026ULL, 0x3737003737370037ULL,
+	0x3b3b003b3b3b003bULL, 0x9696009696960096ULL, 0x4b4b004b4b4b004bULL,
+	0xbebe00bebebe00beULL, 0x2e2e002e2e2e002eULL, 0x7979007979790079ULL,
+	0x8c8c008c8c8c008cULL, 0x6e6e006e6e6e006eULL, 0x8e8e008e8e8e008eULL,
+	0xf5f500f5f5f500f5ULL, 0xb6b600b6b6b600b6ULL, 0xfdfd00fdfdfd00fdULL,
+	0x5959005959590059ULL, 0x9898009898980098ULL, 0x6a6a006a6a6a006aULL,
+	0x4646004646460046ULL, 0xbaba00bababa00baULL, 0x2525002525250025ULL,
+	0x4242004242420042ULL, 0xa2a200a2a2a200a2ULL, 0xfafa00fafafa00faULL,
+	0x0707000707070007ULL, 0x5555005555550055ULL, 0xeeee00eeeeee00eeULL,
+	0x0a0a000a0a0a000aULL, 0x4949004949490049ULL, 0x6868006868680068ULL,
+	0x3838003838380038ULL, 0xa4a400a4a4a400a4ULL, 0x2828002828280028ULL,
+	0x7b7b007b7b7b007bULL, 0xc9c900c9c9c900c9ULL, 0xc1c100c1c1c100c1ULL,
+	0xe3e300e3e3e300e3ULL, 0xf4f400f4f4f400f4ULL, 0xc7c700c7c7c700c7ULL,
+	0x9e9e009e9e9e009eULL,
 };
 
 const u64 camellia_sp11101110[256] = {
-	0x7070700070707000, 0x8282820082828200, 0x2c2c2c002c2c2c00,
-	0xececec00ececec00, 0xb3b3b300b3b3b300, 0x2727270027272700,
-	0xc0c0c000c0c0c000, 0xe5e5e500e5e5e500, 0xe4e4e400e4e4e400,
-	0x8585850085858500, 0x5757570057575700, 0x3535350035353500,
-	0xeaeaea00eaeaea00, 0x0c0c0c000c0c0c00, 0xaeaeae00aeaeae00,
-	0x4141410041414100, 0x2323230023232300, 0xefefef00efefef00,
-	0x6b6b6b006b6b6b00, 0x9393930093939300, 0x4545450045454500,
-	0x1919190019191900, 0xa5a5a500a5a5a500, 0x2121210021212100,
-	0xededed00ededed00, 0x0e0e0e000e0e0e00, 0x4f4f4f004f4f4f00,
-	0x4e4e4e004e4e4e00, 0x1d1d1d001d1d1d00, 0x6565650065656500,
-	0x9292920092929200, 0xbdbdbd00bdbdbd00, 0x8686860086868600,
-	0xb8b8b800b8b8b800, 0xafafaf00afafaf00, 0x8f8f8f008f8f8f00,
-	0x7c7c7c007c7c7c00, 0xebebeb00ebebeb00, 0x1f1f1f001f1f1f00,
-	0xcecece00cecece00, 0x3e3e3e003e3e3e00, 0x3030300030303000,
-	0xdcdcdc00dcdcdc00, 0x5f5f5f005f5f5f00, 0x5e5e5e005e5e5e00,
-	0xc5c5c500c5c5c500, 0x0b0b0b000b0b0b00, 0x1a1a1a001a1a1a00,
-	0xa6a6a600a6a6a600, 0xe1e1e100e1e1e100, 0x3939390039393900,
-	0xcacaca00cacaca00, 0xd5d5d500d5d5d500, 0x4747470047474700,
-	0x5d5d5d005d5d5d00, 0x3d3d3d003d3d3d00, 0xd9d9d900d9d9d900,
-	0x0101010001010100, 0x5a5a5a005a5a5a00, 0xd6d6d600d6d6d600,
-	0x5151510051515100, 0x5656560056565600, 0x6c6c6c006c6c6c00,
-	0x4d4d4d004d4d4d00, 0x8b8b8b008b8b8b00, 0x0d0d0d000d0d0d00,
-	0x9a9a9a009a9a9a00, 0x6666660066666600, 0xfbfbfb00fbfbfb00,
-	0xcccccc00cccccc00, 0xb0b0b000b0b0b000, 0x2d2d2d002d2d2d00,
-	0x7474740074747400, 0x1212120012121200, 0x2b2b2b002b2b2b00,
-	0x2020200020202000, 0xf0f0f000f0f0f000, 0xb1b1b100b1b1b100,
-	0x8484840084848400, 0x9999990099999900, 0xdfdfdf00dfdfdf00,
-	0x4c4c4c004c4c4c00, 0xcbcbcb00cbcbcb00, 0xc2c2c200c2c2c200,
-	0x3434340034343400, 0x7e7e7e007e7e7e00, 0x7676760076767600,
-	0x0505050005050500, 0x6d6d6d006d6d6d00, 0xb7b7b700b7b7b700,
-	0xa9a9a900a9a9a900, 0x3131310031313100, 0xd1d1d100d1d1d100,
-	0x1717170017171700, 0x0404040004040400, 0xd7d7d700d7d7d700,
-	0x1414140014141400, 0x5858580058585800, 0x3a3a3a003a3a3a00,
-	0x6161610061616100, 0xdedede00dedede00, 0x1b1b1b001b1b1b00,
-	0x1111110011111100, 0x1c1c1c001c1c1c00, 0x3232320032323200,
-	0x0f0f0f000f0f0f00, 0x9c9c9c009c9c9c00, 0x1616160016161600,
-	0x5353530053535300, 0x1818180018181800, 0xf2f2f200f2f2f200,
-	0x2222220022222200, 0xfefefe00fefefe00, 0x4444440044444400,
-	0xcfcfcf00cfcfcf00, 0xb2b2b200b2b2b200, 0xc3c3c300c3c3c300,
-	0xb5b5b500b5b5b500, 0x7a7a7a007a7a7a00, 0x9191910091919100,
-	0x2424240024242400, 0x0808080008080800, 0xe8e8e800e8e8e800,
-	0xa8a8a800a8a8a800, 0x6060600060606000, 0xfcfcfc00fcfcfc00,
-	0x6969690069696900, 0x5050500050505000, 0xaaaaaa00aaaaaa00,
-	0xd0d0d000d0d0d000, 0xa0a0a000a0a0a000, 0x7d7d7d007d7d7d00,
-	0xa1a1a100a1a1a100, 0x8989890089898900, 0x6262620062626200,
-	0x9797970097979700, 0x5454540054545400, 0x5b5b5b005b5b5b00,
-	0x1e1e1e001e1e1e00, 0x9595950095959500, 0xe0e0e000e0e0e000,
-	0xffffff00ffffff00, 0x6464640064646400, 0xd2d2d200d2d2d200,
-	0x1010100010101000, 0xc4c4c400c4c4c400, 0x0000000000000000,
-	0x4848480048484800, 0xa3a3a300a3a3a300, 0xf7f7f700f7f7f700,
-	0x7575750075757500, 0xdbdbdb00dbdbdb00, 0x8a8a8a008a8a8a00,
-	0x0303030003030300, 0xe6e6e600e6e6e600, 0xdadada00dadada00,
-	0x0909090009090900, 0x3f3f3f003f3f3f00, 0xdddddd00dddddd00,
-	0x9494940094949400, 0x8787870087878700, 0x5c5c5c005c5c5c00,
-	0x8383830083838300, 0x0202020002020200, 0xcdcdcd00cdcdcd00,
-	0x4a4a4a004a4a4a00, 0x9090900090909000, 0x3333330033333300,
-	0x7373730073737300, 0x6767670067676700, 0xf6f6f600f6f6f600,
-	0xf3f3f300f3f3f300, 0x9d9d9d009d9d9d00, 0x7f7f7f007f7f7f00,
-	0xbfbfbf00bfbfbf00, 0xe2e2e200e2e2e200, 0x5252520052525200,
-	0x9b9b9b009b9b9b00, 0xd8d8d800d8d8d800, 0x2626260026262600,
-	0xc8c8c800c8c8c800, 0x3737370037373700, 0xc6c6c600c6c6c600,
-	0x3b3b3b003b3b3b00, 0x8181810081818100, 0x9696960096969600,
-	0x6f6f6f006f6f6f00, 0x4b4b4b004b4b4b00, 0x1313130013131300,
-	0xbebebe00bebebe00, 0x6363630063636300, 0x2e2e2e002e2e2e00,
-	0xe9e9e900e9e9e900, 0x7979790079797900, 0xa7a7a700a7a7a700,
-	0x8c8c8c008c8c8c00, 0x9f9f9f009f9f9f00, 0x6e6e6e006e6e6e00,
-	0xbcbcbc00bcbcbc00, 0x8e8e8e008e8e8e00, 0x2929290029292900,
-	0xf5f5f500f5f5f500, 0xf9f9f900f9f9f900, 0xb6b6b600b6b6b600,
-	0x2f2f2f002f2f2f00, 0xfdfdfd00fdfdfd00, 0xb4b4b400b4b4b400,
-	0x5959590059595900, 0x7878780078787800, 0x9898980098989800,
-	0x0606060006060600, 0x6a6a6a006a6a6a00, 0xe7e7e700e7e7e700,
-	0x4646460046464600, 0x7171710071717100, 0xbababa00bababa00,
-	0xd4d4d400d4d4d400, 0x2525250025252500, 0xababab00ababab00,
-	0x4242420042424200, 0x8888880088888800, 0xa2a2a200a2a2a200,
-	0x8d8d8d008d8d8d00, 0xfafafa00fafafa00, 0x7272720072727200,
-	0x0707070007070700, 0xb9b9b900b9b9b900, 0x5555550055555500,
-	0xf8f8f800f8f8f800, 0xeeeeee00eeeeee00, 0xacacac00acacac00,
-	0x0a0a0a000a0a0a00, 0x3636360036363600, 0x4949490049494900,
-	0x2a2a2a002a2a2a00, 0x6868680068686800, 0x3c3c3c003c3c3c00,
-	0x3838380038383800, 0xf1f1f100f1f1f100, 0xa4a4a400a4a4a400,
-	0x4040400040404000, 0x2828280028282800, 0xd3d3d300d3d3d300,
-	0x7b7b7b007b7b7b00, 0xbbbbbb00bbbbbb00, 0xc9c9c900c9c9c900,
-	0x4343430043434300, 0xc1c1c100c1c1c100, 0x1515150015151500,
-	0xe3e3e300e3e3e300, 0xadadad00adadad00, 0xf4f4f400f4f4f400,
-	0x7777770077777700, 0xc7c7c700c7c7c700, 0x8080800080808000,
-	0x9e9e9e009e9e9e00,
+	0x7070700070707000ULL, 0x8282820082828200ULL, 0x2c2c2c002c2c2c00ULL,
+	0xececec00ececec00ULL, 0xb3b3b300b3b3b300ULL, 0x2727270027272700ULL,
+	0xc0c0c000c0c0c000ULL, 0xe5e5e500e5e5e500ULL, 0xe4e4e400e4e4e400ULL,
+	0x8585850085858500ULL, 0x5757570057575700ULL, 0x3535350035353500ULL,
+	0xeaeaea00eaeaea00ULL, 0x0c0c0c000c0c0c00ULL, 0xaeaeae00aeaeae00ULL,
+	0x4141410041414100ULL, 0x2323230023232300ULL, 0xefefef00efefef00ULL,
+	0x6b6b6b006b6b6b00ULL, 0x9393930093939300ULL, 0x4545450045454500ULL,
+	0x1919190019191900ULL, 0xa5a5a500a5a5a500ULL, 0x2121210021212100ULL,
+	0xededed00ededed00ULL, 0x0e0e0e000e0e0e00ULL, 0x4f4f4f004f4f4f00ULL,
+	0x4e4e4e004e4e4e00ULL, 0x1d1d1d001d1d1d00ULL, 0x6565650065656500ULL,
+	0x9292920092929200ULL, 0xbdbdbd00bdbdbd00ULL, 0x8686860086868600ULL,
+	0xb8b8b800b8b8b800ULL, 0xafafaf00afafaf00ULL, 0x8f8f8f008f8f8f00ULL,
+	0x7c7c7c007c7c7c00ULL, 0xebebeb00ebebeb00ULL, 0x1f1f1f001f1f1f00ULL,
+	0xcecece00cecece00ULL, 0x3e3e3e003e3e3e00ULL, 0x3030300030303000ULL,
+	0xdcdcdc00dcdcdc00ULL, 0x5f5f5f005f5f5f00ULL, 0x5e5e5e005e5e5e00ULL,
+	0xc5c5c500c5c5c500ULL, 0x0b0b0b000b0b0b00ULL, 0x1a1a1a001a1a1a00ULL,
+	0xa6a6a600a6a6a600ULL, 0xe1e1e100e1e1e100ULL, 0x3939390039393900ULL,
+	0xcacaca00cacaca00ULL, 0xd5d5d500d5d5d500ULL, 0x4747470047474700ULL,
+	0x5d5d5d005d5d5d00ULL, 0x3d3d3d003d3d3d00ULL, 0xd9d9d900d9d9d900ULL,
+	0x0101010001010100ULL, 0x5a5a5a005a5a5a00ULL, 0xd6d6d600d6d6d600ULL,
+	0x5151510051515100ULL, 0x5656560056565600ULL, 0x6c6c6c006c6c6c00ULL,
+	0x4d4d4d004d4d4d00ULL, 0x8b8b8b008b8b8b00ULL, 0x0d0d0d000d0d0d00ULL,
+	0x9a9a9a009a9a9a00ULL, 0x6666660066666600ULL, 0xfbfbfb00fbfbfb00ULL,
+	0xcccccc00cccccc00ULL, 0xb0b0b000b0b0b000ULL, 0x2d2d2d002d2d2d00ULL,
+	0x7474740074747400ULL, 0x1212120012121200ULL, 0x2b2b2b002b2b2b00ULL,
+	0x2020200020202000ULL, 0xf0f0f000f0f0f000ULL, 0xb1b1b100b1b1b100ULL,
+	0x8484840084848400ULL, 0x9999990099999900ULL, 0xdfdfdf00dfdfdf00ULL,
+	0x4c4c4c004c4c4c00ULL, 0xcbcbcb00cbcbcb00ULL, 0xc2c2c200c2c2c200ULL,
+	0x3434340034343400ULL, 0x7e7e7e007e7e7e00ULL, 0x7676760076767600ULL,
+	0x0505050005050500ULL, 0x6d6d6d006d6d6d00ULL, 0xb7b7b700b7b7b700ULL,
+	0xa9a9a900a9a9a900ULL, 0x3131310031313100ULL, 0xd1d1d100d1d1d100ULL,
+	0x1717170017171700ULL, 0x0404040004040400ULL, 0xd7d7d700d7d7d700ULL,
+	0x1414140014141400ULL, 0x5858580058585800ULL, 0x3a3a3a003a3a3a00ULL,
+	0x6161610061616100ULL, 0xdedede00dedede00ULL, 0x1b1b1b001b1b1b00ULL,
+	0x1111110011111100ULL, 0x1c1c1c001c1c1c00ULL, 0x3232320032323200ULL,
+	0x0f0f0f000f0f0f00ULL, 0x9c9c9c009c9c9c00ULL, 0x1616160016161600ULL,
+	0x5353530053535300ULL, 0x1818180018181800ULL, 0xf2f2f200f2f2f200ULL,
+	0x2222220022222200ULL, 0xfefefe00fefefe00ULL, 0x4444440044444400ULL,
+	0xcfcfcf00cfcfcf00ULL, 0xb2b2b200b2b2b200ULL, 0xc3c3c300c3c3c300ULL,
+	0xb5b5b500b5b5b500ULL, 0x7a7a7a007a7a7a00ULL, 0x9191910091919100ULL,
+	0x2424240024242400ULL, 0x0808080008080800ULL, 0xe8e8e800e8e8e800ULL,
+	0xa8a8a800a8a8a800ULL, 0x6060600060606000ULL, 0xfcfcfc00fcfcfc00ULL,
+	0x6969690069696900ULL, 0x5050500050505000ULL, 0xaaaaaa00aaaaaa00ULL,
+	0xd0d0d000d0d0d000ULL, 0xa0a0a000a0a0a000ULL, 0x7d7d7d007d7d7d00ULL,
+	0xa1a1a100a1a1a100ULL, 0x8989890089898900ULL, 0x6262620062626200ULL,
+	0x9797970097979700ULL, 0x5454540054545400ULL, 0x5b5b5b005b5b5b00ULL,
+	0x1e1e1e001e1e1e00ULL, 0x9595950095959500ULL, 0xe0e0e000e0e0e000ULL,
+	0xffffff00ffffff00ULL, 0x6464640064646400ULL, 0xd2d2d200d2d2d200ULL,
+	0x1010100010101000ULL, 0xc4c4c400c4c4c400ULL, 0x0000000000000000ULL,
+	0x4848480048484800ULL, 0xa3a3a300a3a3a300ULL, 0xf7f7f700f7f7f700ULL,
+	0x7575750075757500ULL, 0xdbdbdb00dbdbdb00ULL, 0x8a8a8a008a8a8a00ULL,
+	0x0303030003030300ULL, 0xe6e6e600e6e6e600ULL, 0xdadada00dadada00ULL,
+	0x0909090009090900ULL, 0x3f3f3f003f3f3f00ULL, 0xdddddd00dddddd00ULL,
+	0x9494940094949400ULL, 0x8787870087878700ULL, 0x5c5c5c005c5c5c00ULL,
+	0x8383830083838300ULL, 0x0202020002020200ULL, 0xcdcdcd00cdcdcd00ULL,
+	0x4a4a4a004a4a4a00ULL, 0x9090900090909000ULL, 0x3333330033333300ULL,
+	0x7373730073737300ULL, 0x6767670067676700ULL, 0xf6f6f600f6f6f600ULL,
+	0xf3f3f300f3f3f300ULL, 0x9d9d9d009d9d9d00ULL, 0x7f7f7f007f7f7f00ULL,
+	0xbfbfbf00bfbfbf00ULL, 0xe2e2e200e2e2e200ULL, 0x5252520052525200ULL,
+	0x9b9b9b009b9b9b00ULL, 0xd8d8d800d8d8d800ULL, 0x2626260026262600ULL,
+	0xc8c8c800c8c8c800ULL, 0x3737370037373700ULL, 0xc6c6c600c6c6c600ULL,
+	0x3b3b3b003b3b3b00ULL, 0x8181810081818100ULL, 0x9696960096969600ULL,
+	0x6f6f6f006f6f6f00ULL, 0x4b4b4b004b4b4b00ULL, 0x1313130013131300ULL,
+	0xbebebe00bebebe00ULL, 0x6363630063636300ULL, 0x2e2e2e002e2e2e00ULL,
+	0xe9e9e900e9e9e900ULL, 0x7979790079797900ULL, 0xa7a7a700a7a7a700ULL,
+	0x8c8c8c008c8c8c00ULL, 0x9f9f9f009f9f9f00ULL, 0x6e6e6e006e6e6e00ULL,
+	0xbcbcbc00bcbcbc00ULL, 0x8e8e8e008e8e8e00ULL, 0x2929290029292900ULL,
+	0xf5f5f500f5f5f500ULL, 0xf9f9f900f9f9f900ULL, 0xb6b6b600b6b6b600ULL,
+	0x2f2f2f002f2f2f00ULL, 0xfdfdfd00fdfdfd00ULL, 0xb4b4b400b4b4b400ULL,
+	0x5959590059595900ULL, 0x7878780078787800ULL, 0x9898980098989800ULL,
+	0x0606060006060600ULL, 0x6a6a6a006a6a6a00ULL, 0xe7e7e700e7e7e700ULL,
+	0x4646460046464600ULL, 0x7171710071717100ULL, 0xbababa00bababa00ULL,
+	0xd4d4d400d4d4d400ULL, 0x2525250025252500ULL, 0xababab00ababab00ULL,
+	0x4242420042424200ULL, 0x8888880088888800ULL, 0xa2a2a200a2a2a200ULL,
+	0x8d8d8d008d8d8d00ULL, 0xfafafa00fafafa00ULL, 0x7272720072727200ULL,
+	0x0707070007070700ULL, 0xb9b9b900b9b9b900ULL, 0x5555550055555500ULL,
+	0xf8f8f800f8f8f800ULL, 0xeeeeee00eeeeee00ULL, 0xacacac00acacac00ULL,
+	0x0a0a0a000a0a0a00ULL, 0x3636360036363600ULL, 0x4949490049494900ULL,
+	0x2a2a2a002a2a2a00ULL, 0x6868680068686800ULL, 0x3c3c3c003c3c3c00ULL,
+	0x3838380038383800ULL, 0xf1f1f100f1f1f100ULL, 0xa4a4a400a4a4a400ULL,
+	0x4040400040404000ULL, 0x2828280028282800ULL, 0xd3d3d300d3d3d300ULL,
+	0x7b7b7b007b7b7b00ULL, 0xbbbbbb00bbbbbb00ULL, 0xc9c9c900c9c9c900ULL,
+	0x4343430043434300ULL, 0xc1c1c100c1c1c100ULL, 0x1515150015151500ULL,
+	0xe3e3e300e3e3e300ULL, 0xadadad00adadad00ULL, 0xf4f4f400f4f4f400ULL,
+	0x7777770077777700ULL, 0xc7c7c700c7c7c700ULL, 0x8080800080808000ULL,
+	0x9e9e9e009e9e9e00ULL,
 };
 
 /* key constants */
-- 
cgit v1.2.3


From d4c9dbc61fe0ca042b835c6f234af12fa5f18310 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Fri, 7 Sep 2012 07:54:52 +0100
Subject: x86/mm: Fix range check in tlbflush debugfs interface

Since the shift count settable there is used for shifting values
of type "unsigned long", its value must not match or exceed
BITS_PER_LONG (otherwise the shift operations are undefined).

Similarly, the value must not be negative (but -1 must be
permitted, as that's the value used to distinguish the case of
the fine grained flushing being disabled).

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Alex Shi <alex.shi@intel.com>
Link: http://lkml.kernel.org/r/5049B65C020000780009990C@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/tlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index 613cd83e8c0c..a085c560b4a5 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -320,7 +320,7 @@ static ssize_t tlbflush_write_file(struct file *file,
 	if (kstrtos8(buf, 0, &shift))
 		return -EINVAL;
 
-	if (shift > 64)
+	if (shift < -1 || shift >= BITS_PER_LONG)
 		return -EINVAL;
 
 	tlb_flushall_shift = shift;
-- 
cgit v1.2.3


From 4f97704555672f9ab48ca623561e96a9430bec9a Mon Sep 17 00:00:00 2001
From: "Ren, Yongjie" <yongjie.ren@intel.com>
Date: Fri, 7 Sep 2012 07:36:59 +0000
Subject: KVM: x86: Check INVPCID feature bit in EBX of leaf 7

Checks and operations on the INVPCID feature bit should use EBX
of CPUID leaf 7 instead of ECX.

Signed-off-by: Junjie Mao <junjie.mao@intel.com>
Signed-off-by: Yongjie Ren <yongjien.ren@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c00f03de1b79..002b4a566e2d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6575,7 +6575,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 	/* Exposing INVPCID only when PCID is exposed */
 	best = kvm_find_cpuid_entry(vcpu, 0x7, 0);
 	if (vmx_invpcid_supported() &&
-	    best && (best->ecx & bit(X86_FEATURE_INVPCID)) &&
+	    best && (best->ebx & bit(X86_FEATURE_INVPCID)) &&
 	    guest_cpuid_has_pcid(vcpu)) {
 		exec_control |= SECONDARY_EXEC_ENABLE_INVPCID;
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
@@ -6585,7 +6585,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
 			     exec_control);
 		if (best)
-			best->ecx &= ~bit(X86_FEATURE_INVPCID);
+			best->ebx &= ~bit(X86_FEATURE_INVPCID);
 	}
 }
 
-- 
cgit v1.2.3


From 3dc9a633f8a65b39c5897874138027328bfb0a94 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 4 Sep 2012 08:28:02 +0000
Subject: acpi-cpufreq: Add support for modern AMD CPUs

The programming model for P-states on modern AMD CPUs is very similar to
that of Intel and VIA. It makes sense to consolidate this support into one
driver rather than duplicating functionality between two of them. This
patch adds support for AMDs with hardware P-state control to acpi-cpufreq.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/include/asm/msr-index.h |  2 ++
 drivers/cpufreq/Kconfig.x86      |  3 ++-
 drivers/cpufreq/acpi-cpufreq.c   | 43 ++++++++++++++++++++++++++++++++++------
 3 files changed, 41 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385af..1e1f3eb58638 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,8 @@
 
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
+#define MSR_AMD_PERF_STATUS		0xc0010063
+#define MSR_AMD_PERF_CTL		0xc0010062
 
 #define MSR_IA32_MPERF			0x000000e7
 #define MSR_IA32_APERF			0x000000e8
diff --git a/drivers/cpufreq/Kconfig.x86 b/drivers/cpufreq/Kconfig.x86
index 78ff7ee48951..8d12e378a7ef 100644
--- a/drivers/cpufreq/Kconfig.x86
+++ b/drivers/cpufreq/Kconfig.x86
@@ -23,7 +23,8 @@ config X86_ACPI_CPUFREQ
 	help
 	  This driver adds a CPUFreq driver which utilizes the ACPI
 	  Processor Performance States.
-	  This driver also supports Intel Enhanced Speedstep.
+	  This driver also supports Intel Enhanced Speedstep and newer
+	  AMD CPUs.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called acpi-cpufreq.
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 56c6c6b4eb4d..067a61f06bb5 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -54,10 +54,12 @@ MODULE_LICENSE("GPL");
 enum {
 	UNDEFINED_CAPABLE = 0,
 	SYSTEM_INTEL_MSR_CAPABLE,
+	SYSTEM_AMD_MSR_CAPABLE,
 	SYSTEM_IO_CAPABLE,
 };
 
 #define INTEL_MSR_RANGE		(0xffff)
+#define AMD_MSR_RANGE		(0x7)
 
 struct acpi_cpufreq_data {
 	struct acpi_processor_performance *acpi_data;
@@ -82,6 +84,13 @@ static int check_est_cpu(unsigned int cpuid)
 	return cpu_has(cpu, X86_FEATURE_EST);
 }
 
+static int check_amd_hwpstate_cpu(unsigned int cpuid)
+{
+	struct cpuinfo_x86 *cpu = &cpu_data(cpuid);
+
+	return cpu_has(cpu, X86_FEATURE_HW_PSTATE);
+}
+
 static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data)
 {
 	struct acpi_processor_performance *perf;
@@ -101,7 +110,11 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data)
 	int i;
 	struct acpi_processor_performance *perf;
 
-	msr &= INTEL_MSR_RANGE;
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+		msr &= AMD_MSR_RANGE;
+	else
+		msr &= INTEL_MSR_RANGE;
+
 	perf = data->acpi_data;
 
 	for (i = 0; data->freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
@@ -115,6 +128,7 @@ static unsigned extract_freq(u32 val, struct acpi_cpufreq_data *data)
 {
 	switch (data->cpu_feature) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
+	case SYSTEM_AMD_MSR_CAPABLE:
 		return extract_msr(val, data);
 	case SYSTEM_IO_CAPABLE:
 		return extract_io(val, data);
@@ -150,6 +164,7 @@ static void do_drv_read(void *_cmd)
 
 	switch (cmd->type) {
 	case SYSTEM_INTEL_MSR_CAPABLE:
+	case SYSTEM_AMD_MSR_CAPABLE:
 		rdmsr(cmd->addr.msr.reg, cmd->val, h);
 		break;
 	case SYSTEM_IO_CAPABLE:
@@ -174,6 +189,9 @@ static void do_drv_write(void *_cmd)
 		lo = (lo & ~INTEL_MSR_RANGE) | (cmd->val & INTEL_MSR_RANGE);
 		wrmsr(cmd->addr.msr.reg, lo, hi);
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		wrmsr(cmd->addr.msr.reg, cmd->val, 0);
+		break;
 	case SYSTEM_IO_CAPABLE:
 		acpi_os_write_port((acpi_io_address)cmd->addr.io.port,
 				cmd->val,
@@ -217,6 +235,10 @@ static u32 get_cur_val(const struct cpumask *mask)
 		cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
 		cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_AMD_PERF_STATUS;
+		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
 		perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data;
@@ -326,6 +348,11 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 		cmd.addr.msr.reg = MSR_IA32_PERF_CTL;
 		cmd.val = (u32) perf->states[next_perf_state].control;
 		break;
+	case SYSTEM_AMD_MSR_CAPABLE:
+		cmd.type = SYSTEM_AMD_MSR_CAPABLE;
+		cmd.addr.msr.reg = MSR_AMD_PERF_CTL;
+		cmd.val = (u32) perf->states[next_perf_state].control;
+		break;
 	case SYSTEM_IO_CAPABLE:
 		cmd.type = SYSTEM_IO_CAPABLE;
 		cmd.addr.io.port = perf->control_register.address;
@@ -580,12 +607,16 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
 		break;
 	case ACPI_ADR_SPACE_FIXED_HARDWARE:
 		pr_debug("HARDWARE addr space\n");
-		if (!check_est_cpu(cpu)) {
-			result = -ENODEV;
-			goto err_unreg;
+		if (check_est_cpu(cpu)) {
+			data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
+			break;
 		}
-		data->cpu_feature = SYSTEM_INTEL_MSR_CAPABLE;
-		break;
+		if (check_amd_hwpstate_cpu(cpu)) {
+			data->cpu_feature = SYSTEM_AMD_MSR_CAPABLE;
+			break;
+		}
+		result = -ENODEV;
+		goto err_unreg;
 	default:
 		pr_debug("Unknown addr space %d\n",
 			(u32) (perf->control_register.space_id));
-- 
cgit v1.2.3


From f594065faf4f9067c2283a34619fc0714e79a98d Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Tue, 4 Sep 2012 08:28:06 +0000
Subject: ACPI: Add fixups for AMD P-state figures

Some AMD systems may round the frequencies in ACPI tables to 100MHz
boundaries. We can obtain the real frequencies from MSRs, so add a quirk
to fix these frequencies up on AMD systems.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 arch/x86/include/asm/msr-index.h |  1 +
 drivers/acpi/processor_perflib.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1e1f3eb58638..fbee9714d9ab 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -248,6 +248,7 @@
 
 #define MSR_IA32_PERF_STATUS		0x00000198
 #define MSR_IA32_PERF_CTL		0x00000199
+#define MSR_AMD_PSTATE_DEF_BASE		0xc0010064
 #define MSR_AMD_PERF_STATUS		0xc0010063
 #define MSR_AMD_PERF_CTL		0xc0010062
 
diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
index a093dc163a42..836bfe069042 100644
--- a/drivers/acpi/processor_perflib.c
+++ b/drivers/acpi/processor_perflib.c
@@ -324,6 +324,34 @@ static int acpi_processor_get_performance_control(struct acpi_processor *pr)
 	return result;
 }
 
+#ifdef CONFIG_X86
+/*
+ * Some AMDs have 50MHz frequency multiples, but only provide 100MHz rounding
+ * in their ACPI data. Calculate the real values and fix up the _PSS data.
+ */
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i)
+{
+	u32 hi, lo, fid, did;
+	int index = px->control & 0x00000007;
+
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+		return;
+
+	if ((boot_cpu_data.x86 == 0x10 && boot_cpu_data.x86_model < 10)
+	    || boot_cpu_data.x86 == 0x11) {
+		rdmsr(MSR_AMD_PSTATE_DEF_BASE + index, lo, hi);
+		fid = lo & 0x3f;
+		did = (lo >> 6) & 7;
+		if (boot_cpu_data.x86 == 0x10)
+			px->core_frequency = (100 * (fid + 0x10)) >> did;
+		else
+			px->core_frequency = (100 * (fid + 8)) >> did;
+	}
+}
+#else
+static void amd_fixup_frequency(struct acpi_processor_px *px, int i) {};
+#endif
+
 static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 {
 	int result = 0;
@@ -379,6 +407,8 @@ static int acpi_processor_get_performance_states(struct acpi_processor *pr)
 			goto end;
 		}
 
+		amd_fixup_frequency(px, i);
+
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
 				  "State [%d]: core_frequency[%d] power[%d] transition_latency[%d] bus_master_latency[%d] control[0x%x] status[0x%x]\n",
 				  i,
-- 
cgit v1.2.3


From 92b5265d38f6a4d33e9d43974f176f18547687d6 Mon Sep 17 00:00:00 2001
From: "Liu, Jinsong" <jinsong.liu@intel.com>
Date: Mon, 10 Sep 2012 06:55:39 +0800
Subject: KVM: Depend on HIGH_RES_TIMERS

KVM lapic timer and tsc deadline timer based on hrtimer,
setting a leftmost node to rb tree and then do hrtimer reprogram.
If hrtimer not configured as high resolution, hrtimer_enqueue_reprogram
do nothing and then make kvm lapic timer and tsc deadline timer fail.

Signed-off-by: Liu, Jinsong <jinsong.liu@intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 45c044f0fff7..586f00059805 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -20,6 +20,7 @@ if VIRTUALIZATION
 config KVM
 	tristate "Kernel-based Virtual Machine (KVM) support"
 	depends on HAVE_KVM
+	depends on HIGH_RES_TIMERS
 	# for device assignment:
 	depends on PCI
 	# for TASKSTATS/TASK_DELAY_ACCT:
-- 
cgit v1.2.3


From 7de5bdc96c372ab875408c86e0099958dba89f56 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 7 Sep 2012 14:15:03 +0800
Subject: KVM: MMU: remove unnecessary check

Checking the return of kvm_mmu_get_page is unnecessary since it is
guaranteed by memory cache

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 399c177212b5..aa0b469ee07d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2616,11 +2616,6 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 			sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
 					      iterator.level - 1,
 					      1, ACC_ALL, iterator.sptep);
-			if (!sp) {
-				pgprintk("nonpaging_map: ENOMEM\n");
-				kvm_release_pfn_clean(pfn);
-				return -ENOMEM;
-			}
 
 			mmu_spte_set(iterator.sptep,
 				     __pa(sp->spt)
-- 
cgit v1.2.3


From 4484141a94f4a5afea6ebc0b2abba0aa1b0ae9d1 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Fri, 7 Sep 2012 14:14:20 +0800
Subject: KVM: fix error paths for failed gfn_to_page() calls

This bug was triggered:
[ 4220.198458] BUG: unable to handle kernel paging request at fffffffffffffffe
[ 4220.203907] IP: [<ffffffff81104d85>] put_page+0xf/0x34
......
[ 4220.237326] Call Trace:
[ 4220.237361]  [<ffffffffa03830d0>] kvm_arch_destroy_vm+0xf9/0x101 [kvm]
[ 4220.237382]  [<ffffffffa036fe53>] kvm_put_kvm+0xcc/0x127 [kvm]
[ 4220.237401]  [<ffffffffa03702bc>] kvm_vcpu_release+0x18/0x1c [kvm]
[ 4220.237407]  [<ffffffff81145425>] __fput+0x111/0x1ed
[ 4220.237411]  [<ffffffff8114550f>] ____fput+0xe/0x10
[ 4220.237418]  [<ffffffff81063511>] task_work_run+0x5d/0x88
[ 4220.237424]  [<ffffffff8104c3f7>] do_exit+0x2bf/0x7ca

The test case:

	printf(fmt, ##args);		\
	exit(-1);} while (0)

static int create_vm(void)
{
	int sys_fd, vm_fd;

	sys_fd = open("/dev/kvm", O_RDWR);
	if (sys_fd < 0)
		die("open /dev/kvm fail.\n");

	vm_fd = ioctl(sys_fd, KVM_CREATE_VM, 0);
	if (vm_fd < 0)
		die("KVM_CREATE_VM fail.\n");

	return vm_fd;
}

static int create_vcpu(int vm_fd)
{
	int vcpu_fd;

	vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
	if (vcpu_fd < 0)
		die("KVM_CREATE_VCPU ioctl.\n");
	printf("Create vcpu.\n");
	return vcpu_fd;
}

static void *vcpu_thread(void *arg)
{
	int vm_fd = (int)(long)arg;

	create_vcpu(vm_fd);
	return NULL;
}

int main(int argc, char *argv[])
{
	pthread_t thread;
	int vm_fd;

	(void)argc;
	(void)argv;

	vm_fd = create_vm();
	pthread_create(&thread, NULL, vcpu_thread, (void *)(long)vm_fd);
	printf("Exit.\n");
	return 0;
}

It caused by release kvm->arch.ept_identity_map_addr which is the
error page.

The parent thread can send KILL signal to the vcpu thread when it was
exiting which stops faulting pages and potentially allocating memory.
So gfn_to_pfn/gfn_to_page may fail at this time

Fixed by checking the page before it is used

Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/vmx.c | 19 ++++++++++++++++---
 arch/x86/kvm/x86.c | 13 ++++++++++---
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 002b4a566e2d..b1eb202ee76a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3619,6 +3619,7 @@ static void seg_setup(int seg)
 
 static int alloc_apic_access_page(struct kvm *kvm)
 {
+	struct page *page;
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
@@ -3633,7 +3634,13 @@ static int alloc_apic_access_page(struct kvm *kvm)
 	if (r)
 		goto out;
 
-	kvm->arch.apic_access_page = gfn_to_page(kvm, 0xfee00);
+	page = gfn_to_page(kvm, 0xfee00);
+	if (is_error_page(page)) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	kvm->arch.apic_access_page = page;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
@@ -3641,6 +3648,7 @@ out:
 
 static int alloc_identity_pagetable(struct kvm *kvm)
 {
+	struct page *page;
 	struct kvm_userspace_memory_region kvm_userspace_mem;
 	int r = 0;
 
@@ -3656,8 +3664,13 @@ static int alloc_identity_pagetable(struct kvm *kvm)
 	if (r)
 		goto out;
 
-	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
-			kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+	page = gfn_to_page(kvm, kvm->arch.ept_identity_map_addr >> PAGE_SHIFT);
+	if (is_error_page(page)) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	kvm->arch.ept_identity_pagetable = page;
 out:
 	mutex_unlock(&kvm->slots_lock);
 	return r;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 148ed666e311..2966c847d489 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5113,17 +5113,20 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 			!kvm_event_needs_reinjection(vcpu);
 }
 
-static void vapic_enter(struct kvm_vcpu *vcpu)
+static int vapic_enter(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
 	struct page *page;
 
 	if (!apic || !apic->vapic_addr)
-		return;
+		return 0;
 
 	page = gfn_to_page(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT);
+	if (is_error_page(page))
+		return -EFAULT;
 
 	vcpu->arch.apic->vapic_page = page;
+	return 0;
 }
 
 static void vapic_exit(struct kvm_vcpu *vcpu)
@@ -5430,7 +5433,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 	}
 
 	vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
-	vapic_enter(vcpu);
+	r = vapic_enter(vcpu);
+	if (r) {
+		srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
+		return r;
+	}
 
 	r = 1;
 	while (r > 0) {
-- 
cgit v1.2.3


From 280050cc81ccb2e06e4061228ee34c0cc86b1560 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 10 Sep 2012 22:48:33 +0200
Subject: x86 bpf_jit: support MOD operation

commit b6069a9570 (filter: add MOD operation) added generic
support for modulus operation in BPF.

This patch brings JIT support for x86_64

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: George Bakos <gbakos@alpinista.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 33643a8bcbbb..106c57829120 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -280,6 +280,31 @@ void bpf_jit_compile(struct sk_filter *fp)
 				}
 				EMIT4(0x31, 0xd2, 0xf7, 0xf3); /* xor %edx,%edx; div %ebx */
 				break;
+			case BPF_S_ALU_MOD_X: /* A %= X; */
+				seen |= SEEN_XREG;
+				EMIT2(0x85, 0xdb);	/* test %ebx,%ebx */
+				if (pc_ret0 > 0) {
+					/* addrs[pc_ret0 - 1] is start address of target
+					 * (addrs[i] - 6) is the address following this jmp
+					 * ("xor %edx,%edx; div %ebx;mov %edx,%eax" being 6 bytes long)
+					 */
+					EMIT_COND_JMP(X86_JE, addrs[pc_ret0 - 1] -
+								(addrs[i] - 6));
+				} else {
+					EMIT_COND_JMP(X86_JNE, 2 + 5);
+					CLEAR_A();
+					EMIT1_off32(0xe9, cleanup_addr - (addrs[i] - 6)); /* jmp .+off32 */
+				}
+				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
+				EMIT2(0xf7, 0xf3);	/* div %ebx */
+				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
+				break;
+			case BPF_S_ALU_MOD_K: /* A %= K; */
+				EMIT2(0x31, 0xd2);	/* xor %edx,%edx */
+				EMIT1(0xb9);EMIT(K, 4);	/* mov imm32,%ecx */
+				EMIT2(0xf7, 0xf1);	/* div %ecx */
+				EMIT2(0x89, 0xd0);	/* mov %edx,%eax */
+				break;
 			case BPF_S_ALU_DIV_K: /* A = reciprocal_divide(A, K); */
 				EMIT3(0x48, 0x69, 0xc0); /* imul imm32,%rax,%rax */
 				EMIT(K, 4);
-- 
cgit v1.2.3


From 73090f8993a40a2f67fed1ab866a928c68cd3765 Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio.rao@citrix.com>
Date: Tue, 21 Aug 2012 21:22:37 +0100
Subject: x86: Remove base argument from x86_init.paging.pagetable_setup_start

We either use swapper_pg_dir or the argument is unused. Preparatory
patch to simplify platform pagetable setup further.

Signed-off-by: Attilio Rao <attilio.rao@citrix.com>
Ackedb-by: <konrad.wilk@oracle.com>
Cc: <Ian.Campbell@citrix.com>
Cc: <Stefano.Stabellini@eu.citrix.com>
Cc: <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/1345580561-8506-2-git-send-email-attilio.rao@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h | 6 +++---
 arch/x86/include/asm/x86_init.h      | 2 +-
 arch/x86/kernel/setup.c              | 2 +-
 arch/x86/kernel/x86_init.c           | 3 ++-
 arch/x86/mm/init_32.c                | 4 ++--
 arch/x86/xen/mmu.c                   | 2 +-
 6 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 013286a10c2c..e02b875e6922 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -303,11 +303,11 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(pgd_t *base);
+extern void native_pagetable_setup_start(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_setup_start x86_init_pgd_noop
-#define native_pagetable_setup_done  x86_init_pgd_noop
+#define native_pagetable_setup_start x86_init_pgd_start_noop
+#define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
 struct seq_file;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 38155f667144..782ba0c4b266 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -85,7 +85,7 @@ struct x86_init_mapping {
  * @pagetable_setup_done:	platform specific post paging_init() call
  */
 struct x86_init_paging {
-	void (*pagetable_setup_start)(pgd_t *base);
+	void (*pagetable_setup_start)(void);
 	void (*pagetable_setup_done)(pgd_t *base);
 };
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..90cbbe00adca 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-	x86_init.paging.pagetable_setup_start(swapper_pg_dir);
+	x86_init.paging.pagetable_setup_start();
 	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 9f3167e891ef..3b88493ec7ca 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,8 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_noop(pgd_t *unused) { }
+void __init x86_init_pgd_start_noop(void) { }
+void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 575d86f85ce4..c4aa1b25ba34 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,10 +445,10 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
-void __init native_pagetable_setup_start(pgd_t *base)
+void __init native_pagetable_setup_start(void)
 {
 	unsigned long pfn, va;
-	pgd_t *pgd;
+	pgd_t *pgd, *base = swapper_pg_dir;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5141d808e751..32e66c8d0149 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-static void __init xen_pagetable_setup_start(pgd_t *base)
+static void __init xen_pagetable_setup_start(void)
 {
 }
 
-- 
cgit v1.2.3


From 7737b215ad0f94d20a87d98315da9f6cadaf35c9 Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio.rao@citrix.com>
Date: Tue, 21 Aug 2012 21:22:38 +0100
Subject: x86: Rename pagetable_setup_start() to pagetable_init()

In preparation for unifying the pagetable_setup_start() and
pagetable_setup_done() setup functions, rename appropriately all the
infrastructure related to pagetable_setup_start().

Signed-off-by: Attilio Rao <attilio.rao@citrix.com>
Ackedd-by: <konrad.wilk@oracle.com>
Cc: <Ian.Campbell@citrix.com>
Cc: <Stefano.Stabellini@eu.citrix.com>
Cc: <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/1345580561-8506-3-git-send-email-attilio.rao@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h | 4 ++--
 arch/x86/include/asm/x86_init.h      | 4 ++--
 arch/x86/kernel/setup.c              | 2 +-
 arch/x86/kernel/x86_init.c           | 4 ++--
 arch/x86/mm/init_32.c                | 4 ++--
 arch/x86/xen/mmu.c                   | 4 ++--
 6 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index e02b875e6922..0c01e0730f7b 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -303,10 +303,10 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
-extern void native_pagetable_setup_start(void);
+extern void native_pagetable_init(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_setup_start x86_init_pgd_start_noop
+#define native_pagetable_init        x86_init_pgd_init_noop
 #define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 782ba0c4b266..24084b2b3a43 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -81,11 +81,11 @@ struct x86_init_mapping {
 
 /**
  * struct x86_init_paging - platform specific paging functions
- * @pagetable_setup_start:	platform specific pre paging_init() call
+ * @pagetable_init:		platform specific paging initialization call
  * @pagetable_setup_done:	platform specific post paging_init() call
  */
 struct x86_init_paging {
-	void (*pagetable_setup_start)(void);
+	void (*pagetable_init)(void);
 	void (*pagetable_setup_done)(pgd_t *base);
 };
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 90cbbe00adca..61b7d9827afb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -961,7 +961,7 @@ void __init setup_arch(char **cmdline_p)
 	kvmclock_init();
 #endif
 
-	x86_init.paging.pagetable_setup_start();
+	x86_init.paging.pagetable_init();
 	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 3b88493ec7ca..0e1e950113b9 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,7 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_start_noop(void) { }
+void __init x86_init_pgd_init_noop(void) { }
 void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
@@ -69,7 +69,7 @@ struct x86_init_ops x86_init __initdata = {
 	},
 
 	.paging = {
-		.pagetable_setup_start	= native_pagetable_setup_start,
+		.pagetable_init		= native_pagetable_init,
 		.pagetable_setup_done	= native_pagetable_setup_done,
 	},
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c4aa1b25ba34..0e38e0e88046 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -445,7 +445,7 @@ static inline void permanent_kmaps_init(pgd_t *pgd_base)
 }
 #endif /* CONFIG_HIGHMEM */
 
-void __init native_pagetable_setup_start(void)
+void __init native_pagetable_init(void)
 {
 	unsigned long pfn, va;
 	pgd_t *pgd, *base = swapper_pg_dir;
@@ -493,7 +493,7 @@ void __init native_pagetable_setup_done(pgd_t *base)
  * If we're booting paravirtualized under a hypervisor, then there are
  * more options: we may already be running PAE, and the pagetable may
  * or may not be based in swapper_pg_dir.  In any case,
- * paravirt_pagetable_setup_start() will set up swapper_pg_dir
+ * paravirt_pagetable_init() will set up swapper_pg_dir
  * appropriately for the rest of the initialization to work.
  *
  * In general, pagetable_init() assumes that the pagetable may already
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 32e66c8d0149..624efbefb942 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,7 +1174,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
-static void __init xen_pagetable_setup_start(void)
+static void __init xen_pagetable_init(void)
 {
 }
 
@@ -2068,7 +2068,7 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
 void __init xen_init_mmu_ops(void)
 {
 	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
-	x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
+	x86_init.paging.pagetable_init = xen_pagetable_init;
 	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
 
-- 
cgit v1.2.3


From 843b8ed2ec598aae5e3516b21957ede62a070e36 Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio.rao@citrix.com>
Date: Tue, 21 Aug 2012 21:22:39 +0100
Subject: x86: Move paging_init() call to x86_init.paging.pagetable_init()

Move the paging_init() call to the platform specific pagetable_init()
function, so we can get rid of the extra pagetable_setup_done()
function pointer.

Signed-off-by: Attilio Rao <attilio.rao@citrix.com>
Acked-by: <konrad.wilk@oracle.com>
Cc: <Ian.Campbell@citrix.com>
Cc: <Stefano.Stabellini@eu.citrix.com>
Cc: <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/1345580561-8506-4-git-send-email-attilio.rao@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h | 2 +-
 arch/x86/kernel/setup.c              | 1 -
 arch/x86/kernel/x86_init.c           | 1 -
 arch/x86/mm/init_32.c                | 1 +
 arch/x86/xen/mmu.c                   | 1 +
 5 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index 0c01e0730f7b..c93cb8eec7cc 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -306,7 +306,7 @@ extern void native_pagetable_reserve(u64 start, u64 end);
 extern void native_pagetable_init(void);
 extern void native_pagetable_setup_done(pgd_t *base);
 #else
-#define native_pagetable_init        x86_init_pgd_init_noop
+#define native_pagetable_init        paging_init
 #define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 61b7d9827afb..315fd24131ed 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	x86_init.paging.pagetable_init();
-	paging_init();
 	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
 	if (boot_cpu_data.cpuid_level >= 0) {
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 0e1e950113b9..5f2478fb3d6d 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_init_noop(void) { }
 void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 0e38e0e88046..e35b4b17189a 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -475,6 +475,7 @@ void __init native_pagetable_init(void)
 		pte_clear(NULL, va, pte);
 	}
 	paravirt_alloc_pmd(&init_mm, __pa(base) >> PAGE_SHIFT);
+	paging_init();
 }
 
 void __init native_pagetable_setup_done(pgd_t *base)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 624efbefb942..c2ff7ea37b8c 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1176,6 +1176,7 @@ static void xen_exit_mmap(struct mm_struct *mm)
 
 static void __init xen_pagetable_init(void)
 {
+	paging_init();
 }
 
 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-- 
cgit v1.2.3


From c711288727a62f74d48032e56e51333dd104bf58 Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio.rao@citrix.com>
Date: Tue, 21 Aug 2012 21:22:40 +0100
Subject: x86: xen: Cleanup and remove x86_init.paging.pagetable_setup_done()

At this stage x86_init.paging.pagetable_setup_done is only used in the
XEN case. Move its content in the x86_init.paging.pagetable_init setup
function and remove the now unused x86_init.paging.pagetable_setup_done
remaining infrastructure.

Signed-off-by: Attilio Rao <attilio.rao@citrix.com>
Acked-by: <konrad.wilk@oracle.com>
Cc: <Ian.Campbell@citrix.com>
Cc: <Stefano.Stabellini@eu.citrix.com>
Cc: <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/1345580561-8506-5-git-send-email-attilio.rao@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/pgtable_types.h |  2 --
 arch/x86/include/asm/x86_init.h      |  2 --
 arch/x86/kernel/setup.c              |  1 -
 arch/x86/kernel/x86_init.c           |  2 --
 arch/x86/mm/init_32.c                |  4 ----
 arch/x86/xen/mmu.c                   | 13 ++++---------
 6 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index c93cb8eec7cc..db8fec6d2953 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -304,10 +304,8 @@ void set_pte_vaddr(unsigned long vaddr, pte_t pte);
 extern void native_pagetable_reserve(u64 start, u64 end);
 #ifdef CONFIG_X86_32
 extern void native_pagetable_init(void);
-extern void native_pagetable_setup_done(pgd_t *base);
 #else
 #define native_pagetable_init        paging_init
-#define native_pagetable_setup_done  x86_init_pgd_done_noop
 #endif
 
 struct seq_file;
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 24084b2b3a43..995ea5c3fbf4 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -82,11 +82,9 @@ struct x86_init_mapping {
 /**
  * struct x86_init_paging - platform specific paging functions
  * @pagetable_init:		platform specific paging initialization call
- * @pagetable_setup_done:	platform specific post paging_init() call
  */
 struct x86_init_paging {
 	void (*pagetable_init)(void);
-	void (*pagetable_setup_done)(pgd_t *base);
 };
 
 /**
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 315fd24131ed..4f165479c453 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -962,7 +962,6 @@ void __init setup_arch(char **cmdline_p)
 #endif
 
 	x86_init.paging.pagetable_init();
-	x86_init.paging.pagetable_setup_done(swapper_pg_dir);
 
 	if (boot_cpu_data.cpuid_level >= 0) {
 		/* A CPU has %cr4 if and only if it has CPUID */
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 5f2478fb3d6d..7a3d075a814a 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -26,7 +26,6 @@
 
 void __cpuinit x86_init_noop(void) { }
 void __init x86_init_uint_noop(unsigned int unused) { }
-void __init x86_init_pgd_done_noop(pgd_t *unused) { }
 int __init iommu_init_noop(void) { return 0; }
 void iommu_shutdown_noop(void) { }
 
@@ -69,7 +68,6 @@ struct x86_init_ops x86_init __initdata = {
 
 	.paging = {
 		.pagetable_init		= native_pagetable_init,
-		.pagetable_setup_done	= native_pagetable_setup_done,
 	},
 
 	.timers = {
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index e35b4b17189a..4f04db150027 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -478,10 +478,6 @@ void __init native_pagetable_init(void)
 	paging_init();
 }
 
-void __init native_pagetable_setup_done(pgd_t *base)
-{
-}
-
 /*
  * Build a proper pagetable for the kernel mappings.  Up until this
  * point, we've been running on some set of pagetables constructed by
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index c2ff7ea37b8c..7a769b7526cb 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1174,9 +1174,13 @@ static void xen_exit_mmap(struct mm_struct *mm)
 	spin_unlock(&mm->page_table_lock);
 }
 
+static void xen_post_allocator_init(void);
+
 static void __init xen_pagetable_init(void)
 {
 	paging_init();
+	xen_setup_shared_info();
+	xen_post_allocator_init();
 }
 
 static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
@@ -1193,14 +1197,6 @@ static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
 	}
 }
 
-static void xen_post_allocator_init(void);
-
-static void __init xen_pagetable_setup_done(pgd_t *base)
-{
-	xen_setup_shared_info();
-	xen_post_allocator_init();
-}
-
 static void xen_write_cr2(unsigned long cr2)
 {
 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
@@ -2070,7 +2066,6 @@ void __init xen_init_mmu_ops(void)
 {
 	x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
 	x86_init.paging.pagetable_init = xen_pagetable_init;
-	x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
 	pv_mmu_ops = xen_mmu_ops;
 
 	memset(dummy_mapping, 0xff, PAGE_SIZE);
-- 
cgit v1.2.3


From 64282278989d5b0398dcb3ba7904cb00c621dc35 Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio.rao@citrix.com>
Date: Tue, 21 Aug 2012 21:22:41 +0100
Subject: x86: Document x86_init.paging.pagetable_init()

Signed-off-by: Attilio Rao <attilio.rao@citrix.com>
Acked-by: <konrad.wilk@oracle.com>
Cc: <Ian.Campbell@citrix.com>
Cc: <Stefano.Stabellini@eu.citrix.com>
Cc: <xen-devel@lists.xensource.com>
Link: http://lkml.kernel.org/r/1345580561-8506-6-git-send-email-attilio.rao@citrix.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/x86_init.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 995ea5c3fbf4..57693498519c 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -81,7 +81,10 @@ struct x86_init_mapping {
 
 /**
  * struct x86_init_paging - platform specific paging functions
- * @pagetable_init:		platform specific paging initialization call
+ * @pagetable_init:	platform specific paging initialization call to setup
+ *			the kernel pagetables and prepare accessors functions.
+ *			Callback must call paging_init(). Called once after the
+ *			direct mapping for phys memory is available.
  */
 struct x86_init_paging {
 	void (*pagetable_init)(void);
-- 
cgit v1.2.3


From 2fc136eecd0c647a6b13fcd00d0c41a1a28f35a5 Mon Sep 17 00:00:00 2001
From: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Date: Wed, 12 Sep 2012 12:44:30 +0100
Subject: xen/m2p: do not reuse kmap_op->dev_bus_addr

If the caller passes a valid kmap_op to m2p_add_override, we use
kmap_op->dev_bus_addr to store the original mfn, but dev_bus_addr is
part of the interface with Xen and if we are batching the hypercalls it
might not have been written by the hypervisor yet. That means that later
on Xen will write to it and we'll think that the original mfn is
actually what Xen has written to it.

Rather than "stealing" struct members from kmap_op, keep using
page->index to store the original mfn and add another parameter to
m2p_remove_override to get the corresponding kmap_op instead.
It is now responsibility of the caller to keep track of which kmap_op
corresponds to a particular page in the m2p_override (gntdev, the only
user of this interface that passes a valid kmap_op, is already doing that).

CC: stable@kernel.org
Reported-and-Tested-By: Sander Eikelenboom <linux@eikelenboom.it>
Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/page.h     |  3 ++-
 arch/x86/xen/p2m.c                  | 27 +++++++++++----------------
 drivers/block/xen-blkback/blkback.c |  2 +-
 drivers/xen/gntdev.c                |  5 +++--
 drivers/xen/grant-table.c           |  6 ++++--
 include/xen/grant_table.h           |  3 ++-
 6 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h
index 93971e841dd5..472b9b783019 100644
--- a/arch/x86/include/asm/xen/page.h
+++ b/arch/x86/include/asm/xen/page.h
@@ -51,7 +51,8 @@ extern unsigned long set_phys_range_identity(unsigned long pfn_s,
 
 extern int m2p_add_override(unsigned long mfn, struct page *page,
 			    struct gnttab_map_grant_ref *kmap_op);
-extern int m2p_remove_override(struct page *page, bool clear_pte);
+extern int m2p_remove_override(struct page *page,
+				struct gnttab_map_grant_ref *kmap_op);
 extern struct page *m2p_find_override(unsigned long mfn);
 extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn);
 
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c
index 76ba0e97e530..72213da605f5 100644
--- a/arch/x86/xen/p2m.c
+++ b/arch/x86/xen/p2m.c
@@ -828,9 +828,6 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 
 			xen_mc_issue(PARAVIRT_LAZY_MMU);
 		}
-		/* let's use dev_bus_addr to record the old mfn instead */
-		kmap_op->dev_bus_addr = page->index;
-		page->index = (unsigned long) kmap_op;
 	}
 	spin_lock_irqsave(&m2p_override_lock, flags);
 	list_add(&page->lru,  &m2p_overrides[mfn_hash(mfn)]);
@@ -857,7 +854,8 @@ int m2p_add_override(unsigned long mfn, struct page *page,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(m2p_add_override);
-int m2p_remove_override(struct page *page, bool clear_pte)
+int m2p_remove_override(struct page *page,
+		struct gnttab_map_grant_ref *kmap_op)
 {
 	unsigned long flags;
 	unsigned long mfn;
@@ -887,10 +885,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 	WARN_ON(!PagePrivate(page));
 	ClearPagePrivate(page);
 
-	if (clear_pte) {
-		struct gnttab_map_grant_ref *map_op =
-			(struct gnttab_map_grant_ref *) page->index;
-		set_phys_to_machine(pfn, map_op->dev_bus_addr);
+	set_phys_to_machine(pfn, page->index);
+	if (kmap_op != NULL) {
 		if (!PageHighMem(page)) {
 			struct multicall_space mcs;
 			struct gnttab_unmap_grant_ref *unmap_op;
@@ -902,13 +898,13 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			 * issued. In this case handle is going to -1 because
 			 * it hasn't been modified yet.
 			 */
-			if (map_op->handle == -1)
+			if (kmap_op->handle == -1)
 				xen_mc_flush();
 			/*
-			 * Now if map_op->handle is negative it means that the
+			 * Now if kmap_op->handle is negative it means that the
 			 * hypercall actually returned an error.
 			 */
-			if (map_op->handle == GNTST_general_error) {
+			if (kmap_op->handle == GNTST_general_error) {
 				printk(KERN_WARNING "m2p_remove_override: "
 						"pfn %lx mfn %lx, failed to modify kernel mappings",
 						pfn, mfn);
@@ -918,8 +914,8 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			mcs = xen_mc_entry(
 					sizeof(struct gnttab_unmap_grant_ref));
 			unmap_op = mcs.args;
-			unmap_op->host_addr = map_op->host_addr;
-			unmap_op->handle = map_op->handle;
+			unmap_op->host_addr = kmap_op->host_addr;
+			unmap_op->handle = kmap_op->handle;
 			unmap_op->dev_bus_addr = 0;
 
 			MULTI_grant_table_op(mcs.mc,
@@ -930,10 +926,9 @@ int m2p_remove_override(struct page *page, bool clear_pte)
 			set_pte_at(&init_mm, address, ptep,
 					pfn_pte(pfn, PAGE_KERNEL));
 			__flush_tlb_single(address);
-			map_op->host_addr = 0;
+			kmap_op->host_addr = 0;
 		}
-	} else
-		set_phys_to_machine(pfn, page->index);
+	}
 
 	/* p2m(m2p(mfn)) == FOREIGN_FRAME(mfn): the mfn is already present
 	 * somewhere in this domain, even before being added to the
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index 73f196ca713f..c6decb901e5e 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -337,7 +337,7 @@ static void xen_blkbk_unmap(struct pending_req *req)
 		invcount++;
 	}
 
-	ret = gnttab_unmap_refs(unmap, pages, invcount, false);
+	ret = gnttab_unmap_refs(unmap, NULL, pages, invcount);
 	BUG_ON(ret);
 }
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 1ffd03bf8e10..7f1241608489 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -314,8 +314,9 @@ static int __unmap_grant_pages(struct grant_map *map, int offset, int pages)
 		}
 	}
 
-	err = gnttab_unmap_refs(map->unmap_ops + offset, map->pages + offset,
-				pages, true);
+	err = gnttab_unmap_refs(map->unmap_ops + offset,
+			use_ptemod ? map->kmap_ops + offset : NULL, map->pages + offset,
+			pages);
 	if (err)
 		return err;
 
diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 0bfc1ef11259..006726688baf 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -870,7 +870,8 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 EXPORT_SYMBOL_GPL(gnttab_map_refs);
 
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte)
+		      struct gnttab_map_grant_ref *kmap_ops,
+		      struct page **pages, unsigned int count)
 {
 	int i, ret;
 	bool lazy = false;
@@ -888,7 +889,8 @@ int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
 	}
 
 	for (i = 0; i < count; i++) {
-		ret = m2p_remove_override(pages[i], clear_pte);
+		ret = m2p_remove_override(pages[i], kmap_ops ?
+				       &kmap_ops[i] : NULL);
 		if (ret)
 			return ret;
 	}
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 11e27c3af3cb..f19fff8650e9 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -187,6 +187,7 @@ int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
 		    struct gnttab_map_grant_ref *kmap_ops,
 		    struct page **pages, unsigned int count);
 int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
-		      struct page **pages, unsigned int count, bool clear_pte);
+		      struct gnttab_map_grant_ref *kunmap_ops,
+		      struct page **pages, unsigned int count);
 
 #endif /* __ASM_GNTTAB_H__ */
-- 
cgit v1.2.3


From ecba9a52acdf20530d561b7634b80c35c308943a Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Date: Wed, 5 Sep 2012 19:30:01 +0900
Subject: KVM: x86: lapic: Clean up find_highest_vector() and count_vectors()

find_highest_vector() and count_vectors():
 - Instead of using magic values, define and use proper macros.

find_highest_vector():
 - Remove likely() which is there only for historical reasons and not
   doing correct branch predictions anymore.  Using such heuristics
   to optimize this function is not worth it now.  Let CPUs predict
   things instead.

 - Stop checking word[0] separately.  This was only needed for doing
   likely() optimization.

 - Use for loop, not while, to iterate over the register array to make
   the code clearer.

Note that we actually confirmed that the likely() did wrong predictions
by inserting debug code.

Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Takuya Yoshikawa <yoshikawa.takuya@oss.ntt.co.jp>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 07ad628dadb7..6f9fd633c888 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -66,6 +66,7 @@
 #define APIC_DEST_NOSHORT		0x0
 #define APIC_DEST_MASK			0x800
 #define MAX_APIC_VECTOR			256
+#define APIC_VECTORS_PER_REG		32
 
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
@@ -208,25 +209,30 @@ static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 
 static int find_highest_vector(void *bitmap)
 {
-	u32 *word = bitmap;
-	int word_offset = MAX_APIC_VECTOR >> 5;
+	int vec;
+	u32 *reg;
 
-	while ((word_offset != 0) && (word[(--word_offset) << 2] == 0))
-		continue;
+	for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
+	     vec >= 0; vec -= APIC_VECTORS_PER_REG) {
+		reg = bitmap + REG_POS(vec);
+		if (*reg)
+			return fls(*reg) - 1 + vec;
+	}
 
-	if (likely(!word_offset && !word[0]))
-		return -1;
-	else
-		return fls(word[word_offset << 2]) - 1 + (word_offset << 5);
+	return -1;
 }
 
 static u8 count_vectors(void *bitmap)
 {
-	u32 *word = bitmap;
-	int word_offset;
+	int vec;
+	u32 *reg;
 	u8 count = 0;
-	for (word_offset = 0; word_offset < MAX_APIC_VECTOR >> 5; ++word_offset)
-		count += hweight32(word[word_offset << 2]);
+
+	for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
+		reg = bitmap + REG_POS(vec);
+		count += hweight32(*reg);
+	}
+
 	return count;
 }
 
-- 
cgit v1.2.3


From 35534b201c9f115c68962c095b5a9aad204d025f Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 29 Aug 2012 15:01:22 +0200
Subject: perf/x86: Export Sandy Bridge uncore clockticks event in sysfs

This patch exports the clockticks event and its encoding to user level.
The clockticks event was exported for Nehalem/Westmere but not for Sandy
Bridge (client). Given that it uses a special encoding, it needs to be
exported to user tools, so users can do:

  # perf stat -a -C 0 -e uncore_cbox_0/clockticks/ sleep 1

Signed-off-by: Stephane Eranian <eranian@google.com>
Acked-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120829130122.GA32336@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a5571080e74..38e4894165b9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -661,6 +661,11 @@ static void snb_uncore_msr_init_box(struct intel_uncore_box *box)
 	}
 }
 
+static struct uncore_event_desc snb_uncore_events[] = {
+	INTEL_UNCORE_EVENT_DESC(clockticks, "event=0xff,umask=0x00"),
+	{ /* end: all zeroes */ },
+};
+
 static struct attribute *snb_uncore_formats_attr[] = {
 	&format_attr_event.attr,
 	&format_attr_umask.attr,
@@ -704,6 +709,7 @@ static struct intel_uncore_type snb_uncore_cbox = {
 	.constraints	= snb_uncore_cbox_constraints,
 	.ops		= &snb_uncore_msr_ops,
 	.format_group	= &snb_uncore_format_group,
+	.event_descs	= snb_uncore_events,
 };
 
 static struct intel_uncore_type *snb_msr_uncores[] = {
-- 
cgit v1.2.3


From bad9ac2d7f878a31cf1ae8c1ee3768077d222bcb Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 25 Jul 2012 19:12:45 +0200
Subject: perf/x86/ibs: Check syscall attribute flags

Current implementation simply ignores attribute flags. Thus, there is
no notification to userland of unsupported features. Check syscall's
attribute flags to let userland know if a feature is supported by the
kernel. This is also needed to distinguish between future kernels what
might support a feature.

Cc: <stable@vger.kernel.org> v3.5..
Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20120910093018.GO8285@erda.amd.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 12 ++++++++++++
 include/linux/perf_event.h               |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index 7bfb5bec8630..eebd5ffe1bba 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -209,6 +209,15 @@ static int perf_ibs_precise_event(struct perf_event *event, u64 *config)
 	return -EOPNOTSUPP;
 }
 
+static const struct perf_event_attr ibs_notsupp = {
+	.exclude_user	= 1,
+	.exclude_kernel	= 1,
+	.exclude_hv	= 1,
+	.exclude_idle	= 1,
+	.exclude_host	= 1,
+	.exclude_guest	= 1,
+};
+
 static int perf_ibs_init(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
@@ -229,6 +238,9 @@ static int perf_ibs_init(struct perf_event *event)
 	if (event->pmu != &perf_ibs->pmu)
 		return -ENOENT;
 
+	if (perf_flags(&event->attr) & perf_flags(&ibs_notsupp))
+		return -EINVAL;
+
 	if (config & ~perf_ibs->config_mask)
 		return -EINVAL;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 33ed9d605f91..bdb41612bfec 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -274,6 +274,8 @@ struct perf_event_attr {
 	__u64	branch_sample_type; /* enum branch_sample_type */
 };
 
+#define perf_flags(attr)	(*(&(attr)->read_format + 1))
+
 /*
  * Ioctls that can be done on a perf event fd:
  */
-- 
cgit v1.2.3


From 6eebdda35e6b18d0dddb2a44e34211bd94f0cad6 Mon Sep 17 00:00:00 2001
From: Ian Campbell <ian.campbell@citrix.com>
Date: Fri, 24 Aug 2012 23:58:47 +0400
Subject: x86: Drop unnecessary kernel_eflags variable on 64-bit

On 64 bit x86 we save the current eflags in cpu_init for use in
ret_from_fork. Strictly speaking reserved bits in EFLAGS should
be read as written but in practise it is unlikely that EFLAGS
could ever be extended in this way and the kernel alread clears
any undefined flags early on.

The equivalent 32 bit code simply hard codes 0x0202 as the new
EFLAGS.

This change makes 64 bit use the same mechanism to setup the
initial EFLAGS on fork. Note that 64 bit resets EFLAGS before
calling schedule_tail() as opposed to 32 bit which calls
schedule_tail() first. Therefore the correct value for EFLAGS
has opposite IF bit.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Andi Kleen <ak@linux.intel.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Brian Gerst <brgerst@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/r/20120824195847.GA31628@moon
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/processor.h | 1 -
 arch/x86/kernel/cpu/common.c     | 4 ----
 arch/x86/kernel/entry_64.S       | 2 +-
 3 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bcad..9738b39e4eb9 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -423,7 +423,6 @@ DECLARE_INIT_PER_CPU(irq_stack_union);
 
 DECLARE_PER_CPU(char *, irq_stack_ptr);
 DECLARE_PER_CPU(unsigned int, irq_count);
-extern unsigned long kernel_eflags;
 extern asmlinkage void ignore_sysret(void);
 #else	/* X86_64 */
 #ifdef CONFIG_CC_STACKPROTECTOR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fccc..9961e2e23709 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1116,8 +1116,6 @@ void syscall_init(void)
 	       X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
 }
 
-unsigned long kernel_eflags;
-
 /*
  * Copies of the original ist values from the tss are only accessed during
  * debugging, no special alignment required.
@@ -1299,8 +1297,6 @@ void __cpuinit cpu_init(void)
 	fpu_init();
 	xsave_init();
 
-	raw_local_save_flags(kernel_eflags);
-
 	if (is_uv_system())
 		uv_cpu_init();
 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..b1dac12dc5e6 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -440,7 +440,7 @@ ENTRY(ret_from_fork)
 
 	LOCK ; btr $TIF_FORK,TI_flags(%r8)
 
-	pushq_cfi kernel_eflags(%rip)
+	pushq_cfi $0x0002
 	popfq_cfi				# reset kernel eflags
 
 	call schedule_tail			# rdi: 'prev' task parameter
-- 
cgit v1.2.3


From 73e8f3d7e2cb23614d5115703d76d8e54764b641 Mon Sep 17 00:00:00 2001
From: T Makphaibulchoke <tmac@hp.com>
Date: Tue, 28 Aug 2012 21:21:43 -0600
Subject: x86/mm/init.c: Fix devmem_is_allowed() off by one

Fixing an off-by-one error in devmem_is_allowed(), which allows
accesses to physical addresses 0x100000-0x100fff, an extra page
past 1MB.

Signed-off-by: T Makphaibulchoke <tmac@hp.com>
Acked-by: H. Peter Anvin <hpa@zytor.com>
Cc: yinghai@kernel.org
Cc: tiwai@suse.de
Cc: dhowells@redhat.com
Link: http://lkml.kernel.org/r/1346210503-14276-1-git-send-email-tmac@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/mm/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index e0e6990723e9..ab1f6a93b527 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -319,7 +319,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
  */
 int devmem_is_allowed(unsigned long pagenr)
 {
-	if (pagenr <= 256)
+	if (pagenr < 256)
 		return 1;
 	if (iomem_is_exclusive(pagenr << PAGE_SHIFT))
 		return 0;
-- 
cgit v1.2.3


From 1edfbb4153bd29bcf8d2236676238d5237972be1 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:04:16 +0100
Subject: x86/64: Adjust types of temporaries used by ffs()/fls()/fls64()

The 64-bit special cases of the former two (the thrird one is
64-bit only anyway) don't need to use "long" temporaries, as the
result will always fit in a 32-bit variable, and the functions
return plain "int". This avoids a few REX prefixes, i.e.
minimally reduces code size.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DE550020000780009A258@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index 72f5009deb5a..ebaee695394e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -417,10 +417,9 @@ static inline int ffs(int x)
 	 * We cannot do this on 32 bits because at the very least some
 	 * 486 CPUs did not behave this way.
 	 */
-	long tmp = -1;
 	asm("bsfl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (tmp));
+	    : "rm" (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsfl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -459,10 +458,9 @@ static inline int fls(int x)
 	 * We cannot do this on 32 bits because at the very least some
 	 * 486 CPUs did not behave this way.
 	 */
-	long tmp = -1;
 	asm("bsrl %1,%0"
 	    : "=r" (r)
-	    : "rm" (x), "0" (tmp));
+	    : "rm" (x), "0" (-1));
 #elif defined(CONFIG_X86_CMOV)
 	asm("bsrl %1,%0\n\t"
 	    "cmovzl %2,%0"
@@ -490,13 +488,13 @@ static inline int fls(int x)
 #ifdef CONFIG_X86_64
 static __always_inline int fls64(__u64 x)
 {
-	long bitpos = -1;
+	int bitpos = -1;
 	/*
 	 * AMD64 says BSRQ won't clobber the dest reg if x==0; Intel64 says the
 	 * dest reg is undefined if x==0, but their CPU architect says its
 	 * value is written to set it to the same as before.
 	 */
-	asm("bsrq %1,%0"
+	asm("bsrq %1,%q0"
 	    : "+r" (bitpos)
 	    : "rm" (x));
 	return bitpos + 1;
-- 
cgit v1.2.3


From 5870661c091e827973674cc3469b50c959008c2b Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:24:43 +0100
Subject: x86: Prefer TZCNT over BFS

Following a relatively recent compiler change, make use of the
fact that for non-zero input BSF and TZCNT produce the same
result, and that CPUs not knowing of TZCNT will treat the
instruction as BSF (i.e. ignore what looks like a REP prefix to
them). The assumption here is that TZCNT would never have worse
performance than BSF.

For the moment, only do this when the respective generic-CPU
option is selected (as there are no specific-CPU options
covering the CPUs supporting TZCNT), and don't do that when size
optimization was requested.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DEA1B020000780009A277@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index ebaee695394e..b2af6645ea7e 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -347,6 +347,19 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 	 ? constant_test_bit((nr), (addr))	\
 	 : variable_test_bit((nr), (addr)))
 
+#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \
+    && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE)
+/*
+ * Since BSF and TZCNT have sufficiently similar semantics for the purposes
+ * for which we use them here, BMI-capable hardware will decode the prefixed
+ * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while
+ * older hardware will ignore the REP prefix and decode it as 'bsf ...'.
+ */
+# define BSF_PREFIX "rep;"
+#else
+# define BSF_PREFIX
+#endif
+
 /**
  * __ffs - find first set bit in word
  * @word: The word to search
@@ -355,7 +368,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
  */
 static inline unsigned long __ffs(unsigned long word)
 {
-	asm("bsf %1,%0"
+	asm(BSF_PREFIX "bsf %1,%0"
 		: "=r" (word)
 		: "rm" (word));
 	return word;
@@ -369,12 +382,14 @@ static inline unsigned long __ffs(unsigned long word)
  */
 static inline unsigned long ffz(unsigned long word)
 {
-	asm("bsf %1,%0"
+	asm(BSF_PREFIX "bsf %1,%0"
 		: "=r" (word)
 		: "r" (~word));
 	return word;
 }
 
+#undef BSF_PREFIX
+
 /*
  * __fls: find last set bit in word
  * @word: The word to search
-- 
cgit v1.2.3


From 3120e25efdc0834c88e1c0f8394e2087444f8c19 Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Mon, 10 Sep 2012 12:41:45 +0100
Subject: x86/Kconfig: Clean up Kconfig defaults

The main goal here is to have the resulting .config no carry any
options that aren't enabled and can't be (i.e such where the
default is "no" and can't be changed), so that if any such
option later gets a user visible prompt, the user will actually
be prompted on a "make ...oldconfig" rather than keeping the
previously invisible option disabled.

There's a little bit of other trivial cleanup mixed in here.

Signed-off-by: Jan Beulich <jbeulich@suse.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/504DEE19020000780009A285@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/Kconfig     | 50 ++++++++++++++++++++++++++++++--------------------
 arch/x86/Kconfig.cpu |  5 +++--
 2 files changed, 33 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..3fb871908ab2 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -7,11 +7,13 @@ config 64BIT
 	  Say no to build a 32-bit kernel - formerly known as i386
 
 config X86_32
-	def_bool !64BIT
+	def_bool y
+	depends on !64BIT
 	select CLKSRC_I8253
 
 config X86_64
-	def_bool 64BIT
+	def_bool y
+	depends on 64BIT
 	select X86_DEV_DMA_OPS
 
 ### Arch settings
@@ -99,7 +101,8 @@ config X86
 	select GENERIC_STRNLEN_USER
 
 config INSTRUCTION_DECODER
-	def_bool (KPROBES || PERF_EVENTS || UPROBES)
+	def_bool y
+	depends on KPROBES || PERF_EVENTS || UPROBES
 
 config OUTPUT_FORMAT
 	string
@@ -127,13 +130,15 @@ config SBUS
 	bool
 
 config NEED_DMA_MAP_STATE
-       def_bool (X86_64 || INTEL_IOMMU || DMA_API_DEBUG)
+	def_bool y
+	depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG
 
 config NEED_SG_DMA_LENGTH
 	def_bool y
 
 config GENERIC_ISA_DMA
-	def_bool ISA_DMA_API
+	def_bool y
+	depends on ISA_DMA_API
 
 config GENERIC_BUG
 	def_bool y
@@ -150,13 +155,16 @@ config GENERIC_GPIO
 	bool
 
 config ARCH_MAY_HAVE_PC_FDC
-	def_bool ISA_DMA_API
+	def_bool y
+	depends on ISA_DMA_API
 
 config RWSEM_GENERIC_SPINLOCK
-	def_bool !X86_XADD
+	def_bool y
+	depends on !X86_XADD
 
 config RWSEM_XCHGADD_ALGORITHM
-	def_bool X86_XADD
+	def_bool y
+	depends on X86_XADD
 
 config GENERIC_CALIBRATE_DELAY
 	def_bool y
@@ -752,7 +760,8 @@ config SWIOTLB
 	  3 GB of memory. If unsure, say Y.
 
 config IOMMU_HELPER
-	def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU)
+	def_bool y
+	depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU
 
 config MAXSMP
 	bool "Enable Maximum number of SMP Processors and NUMA Nodes"
@@ -1159,10 +1168,12 @@ config X86_PAE
 	  consumes more pagetable space per process.
 
 config ARCH_PHYS_ADDR_T_64BIT
-	def_bool X86_64 || X86_PAE
+	def_bool y
+	depends on X86_64 || X86_PAE
 
 config ARCH_DMA_ADDR_T_64BIT
-	def_bool X86_64 || HIGHMEM64G
+	def_bool y
+	depends on X86_64 || HIGHMEM64G
 
 config DIRECT_GBPAGES
 	bool "Enable 1GB pages for kernel pagetables" if EXPERT
@@ -1285,8 +1296,8 @@ config ARCH_SELECT_MEMORY_MODEL
 	depends on ARCH_SPARSEMEM_ENABLE
 
 config ARCH_MEMORY_PROBE
-	def_bool X86_64
-	depends on MEMORY_HOTPLUG
+	def_bool y
+	depends on X86_64 && MEMORY_HOTPLUG
 
 config ARCH_PROC_KCORE_TEXT
 	def_bool y
@@ -1975,7 +1986,6 @@ config PCI_MMCONFIG
 
 config PCI_CNB20LE_QUIRK
 	bool "Read CNB20LE Host Bridge Windows" if EXPERT
-	default n
 	depends on PCI && EXPERIMENTAL
 	help
 	  Read the PCI windows out of the CNB20LE host bridge. This allows
@@ -2186,18 +2196,18 @@ config COMPAT
 	depends on IA32_EMULATION || X86_X32
 	select ARCH_WANT_OLD_COMPAT_IPC
 
+if COMPAT
 config COMPAT_FOR_U64_ALIGNMENT
-	def_bool COMPAT
-	depends on X86_64
+	def_bool y
 
 config SYSVIPC_COMPAT
 	def_bool y
-	depends on COMPAT && SYSVIPC
+	depends on SYSVIPC
 
 config KEYS_COMPAT
-	bool
-	depends on COMPAT && KEYS
-	default y
+	def_bool y
+	depends on KEYS
+endif
 
 endmenu
 
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 706e12e9984b..f3b86d0df44e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -306,7 +306,8 @@ config X86_INTERNODE_CACHE_SHIFT
 	default X86_L1_CACHE_SHIFT
 
 config X86_CMPXCHG
-	def_bool X86_64 || (X86_32 && !M386)
+	def_bool y
+	depends on X86_64 || (X86_32 && !M386)
 
 config X86_L1_CACHE_SHIFT
 	int
@@ -317,7 +318,7 @@ config X86_L1_CACHE_SHIFT
 
 config X86_XADD
 	def_bool y
-	depends on X86_64 || !M386
+	depends on !M386
 
 config X86_PPRO_FENCE
 	bool "PentiumPro memory ordering errata workaround"
-- 
cgit v1.2.3


From a5e37863ab31d78faddff15675c89979792bc0bd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:00 +0900
Subject: ftrace/x86: Adjust x86 regs.ip as like as x86-64

Adjust x86 regs.ip to ip + MCOUNT_INSN_SIZE as like as
on x86-64. This helps us to consolidate codes which use
regs->ip on both of x86/x86-64.

Link: http://lkml.kernel.org/r/20120905143100.10329.60109.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_32.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 061ac17ee974..f438a44bf8f9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1148,7 +1148,6 @@ ENTRY(ftrace_regs_caller)
 	 * ip location, and move flags into the return ip location.
 	 */
 	pushl 4(%esp)	/* save return ip into ip slot */
-	subl $MCOUNT_INSN_SIZE, (%esp)	/* Adjust ip */
 
 	pushl $0	/* Load 0 into orig_ax */
 	pushl %gs
@@ -1169,6 +1168,7 @@ ENTRY(ftrace_regs_caller)
 	movl $__KERNEL_CS,13*4(%esp)
 
 	movl 12*4(%esp), %eax	/* Load ip (1st parameter) */
+	subl $MCOUNT_INSN_SIZE, %eax	/* Adjust ip */
 	movl 0x4(%ebp), %edx	/* Load parent ip (2nd parameter) */
 	leal function_trace_op, %ecx /* Save ftrace_pos in 3rd parameter */
 	pushl %esp		/* Save pt_regs as 4th parameter */
@@ -1180,7 +1180,6 @@ GLOBAL(ftrace_regs_call)
 	movl 14*4(%esp), %eax	/* Move flags back into cs */
 	movl %eax, 13*4(%esp)	/* Needed to keep addl from modifying flags */
 	movl 12*4(%esp), %eax	/* Get return ip from regs->ip */
-	addl $MCOUNT_INSN_SIZE, %eax
 	movl %eax, 14*4(%esp)	/* Put return ip back for ret */
 
 	popl %ebx
-- 
cgit v1.2.3


From 4b036d54bf849a75d0103b33d92a53f89ecb9315 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:12 +0900
Subject: kprobes/x86: Fix kprobes to collectly handle IP on ftrace

Current kprobe_ftrace_handler expects regs->ip == ip, but it is
incorrect (originally on x86-64). Actually, ftrace handler sets
regs->ip = ip + MCOUNT_INSN_SIZE.
kprobe_ftrace_handler must take care for that.

Link: http://lkml.kernel.org/r/20120905143112.10329.72069.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 47ae1023a93c..f49f60cca40d 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -1072,7 +1072,8 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 	if (kprobe_running()) {
 		kprobes_inc_nmissed_count(p);
 	} else {
-		regs->ip += sizeof(kprobe_opcode_t);
+		/* Kprobe handler expects regs->ip = ip + 1 as breakpoint hit */
+		regs->ip = ip + sizeof(kprobe_opcode_t);
 
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
@@ -1080,13 +1081,15 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 			p->pre_handler(p, regs);
 
 		if (unlikely(p->post_handler)) {
-			/* Emulate singlestep as if there is a 5byte nop */
+			/*
+			 * Emulate singlestep (and also recover regs->ip)
+			 * as if there is a 5byte nop
+			 */
 			regs->ip = ip + MCOUNT_INSN_SIZE;
 			kcb->kprobe_status = KPROBE_HIT_SSDONE;
 			p->post_handler(p, regs, 0);
 		}
 		__this_cpu_write(current_kprobe, NULL);
-		regs->ip = ip;	/* Recover for next callback */
 	}
 end:
 	local_irq_restore(flags);
-- 
cgit v1.2.3


From 47d5a5f88b9d25d6464c9b60c28f391e84e3ed65 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Wed, 5 Sep 2012 23:31:18 +0900
Subject: ftrace/x86-64: Allow to change RIP in handlers

Allow ftrace handlers to change RIP register (regs->ip)
in handlers. This will allow handlers to call another
function instead of original function.

Link: http://lkml.kernel.org/r/20120905143118.10329.5078.stgit@localhost.localdomain

Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/entry_64.S | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index ed767b747fe5..e9cc2b32bdf4 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -165,6 +165,10 @@ GLOBAL(ftrace_regs_call)
 	movq EFLAGS(%rsp), %rax
 	movq %rax, SS(%rsp)
 
+	/* Handlers can change the RIP */
+	movq RIP(%rsp), %rax
+	movq %rax, SS+8(%rsp)
+
 	/* restore the rest of pt_regs */
 	movq R15(%rsp), %r15
 	movq R14(%rsp), %r14
-- 
cgit v1.2.3


From c6aaf4d0bb86e2154ea31a33804cec300611255f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 5 Sep 2012 23:31:25 +0900
Subject: kprobes/x86: Fix to support jprobes on ftrace-based kprobe

Fix kprobes/x86 to support jprobes on ftrace-based kprobes.
Because of -mfentry support of ftrace, ftrace is now put
on the beginning of function where jprobes are put.

Originally ftrace-based kprobes doesn't support jprobe
because it will change regs->ip and ftrace doesn't support
changing IP and ftrace itself doesn't conflict jprobe.
However, ftrace -mfentry support moves mcount call on the
top of functions where jprobes are put. This means that
jprobe always conflicts with ftrace-based kprobe and fails.

This patch allows ftrace-based kprobes to support jprobes
by allowing to modify regs->ip and kprobes breakpoint
handler also allows to skip singlestepping because there
is a ftrace call (not an original instruction).

Link: http://lkml.kernel.org/r/20120905143125.10329.90836.stgit@localhost.localdomain

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/kprobes.c | 42 +++++++++++++++++++++++++++++-------------
 kernel/kprobes.c          |  3 ---
 2 files changed, 29 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index f49f60cca40d..b7c2a85d1926 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,6 +541,8 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb);
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -599,6 +601,12 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 	} else if (kprobe_running()) {
 		p = __this_cpu_read(current_kprobe);
 		if (p->break_handler && p->break_handler(p, regs)) {
+#ifdef KPROBES_CAN_USE_FTRACE
+			if (kprobe_ftrace(p)) {
+				skip_singlestep(p, regs, kcb);
+				return 1;
+			}
+#endif
 			setup_singlestep(p, regs, kcb, 0);
 			return 1;
 		}
@@ -1053,6 +1061,21 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 }
 
 #ifdef KPROBES_CAN_USE_FTRACE
+static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+
 /* Ftrace callback handler for kprobes */
 void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				     struct ftrace_ops *ops, struct pt_regs *regs)
@@ -1077,19 +1100,12 @@ void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 
 		__this_cpu_write(current_kprobe, p);
 		kcb->kprobe_status = KPROBE_HIT_ACTIVE;
-		if (p->pre_handler)
-			p->pre_handler(p, regs);
-
-		if (unlikely(p->post_handler)) {
-			/*
-			 * Emulate singlestep (and also recover regs->ip)
-			 * as if there is a 5byte nop
-			 */
-			regs->ip = ip + MCOUNT_INSN_SIZE;
-			kcb->kprobe_status = KPROBE_HIT_SSDONE;
-			p->post_handler(p, regs, 0);
-		}
-		__this_cpu_write(current_kprobe, NULL);
+		if (!p->pre_handler || !p->pre_handler(p, regs))
+			skip_singlestep(p, regs, kcb);
+		/*
+		 * If pre_handler returns !0, it sets regs->ip and
+		 * resets current kprobe.
+		 */
 	}
 end:
 	local_irq_restore(flags);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 35b4315d84f5..098f396aa409 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1418,9 +1418,6 @@ static __kprobes int check_kprobe_address_safe(struct kprobe *p,
 		/* Given address is not on the instruction boundary */
 		if ((unsigned long)p->addr != ftrace_addr)
 			return -EILSEQ;
-		/* break_handler (jprobe) can not work with ftrace */
-		if (p->break_handler)
-			return -EINVAL;
 		p->flags |= KPROBE_FLAG_FTRACE;
 #else	/* !KPROBES_CAN_USE_FTRACE */
 		return -EINVAL;
-- 
cgit v1.2.3


From bdc1e47217315be14ba04881b0a4c8ecb3ff320c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 20 Aug 2012 12:47:34 +0200
Subject: uprobes/x86: Implement x86 specific arch_uprobe_*_step

The arch specific implementation behaves like user_enable_single_step()
except that it does not disable single stepping if it was already
enabled by ptrace. This allows the debugger to single step over an
uprobe. The state of block stepping is not restored. It makes only sense
together with TF and if that was enabled then the debugger is notified.

Note: this is still not correct. For example, TIF_SINGLESTEP check
is not right, the application itself can set X86_EFLAGS_TF. And otoh
we leak TIF_SINGLESTEP (set by enable) if the probed insn is "popf".
See the next patches, we need the changes in arch/x86/kernel/step.c
first.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h |  2 ++
 arch/x86/kernel/uprobes.c      | 33 +++++++++++++++++++++++++++++++++
 2 files changed, 35 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index f3971bbcd1de..cee58624cb30 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -46,6 +46,8 @@ struct arch_uprobe_task {
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
+#define UPROBE_CLEAR_TF			(1 << 0)
+	unsigned int			restore_flags;
 };
 
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 36fd42091fa7..309a0e02b124 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -41,6 +41,9 @@
 /* Adjust the return address of a call insn */
 #define UPROBE_FIX_CALL	0x2
 
+/* Instruction will modify TF, don't change it */
+#define UPROBE_FIX_SETF	0x4
+
 #define UPROBE_FIX_RIP_AX	0x8000
 #define UPROBE_FIX_RIP_CX	0x4000
 
@@ -239,6 +242,10 @@ static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
 	insn_get_opcode(insn);	/* should be a nop */
 
 	switch (OPCODE1(insn)) {
+	case 0x9d:
+		/* popf */
+		auprobe->fixups |= UPROBE_FIX_SETF;
+		break;
 	case 0xc3:		/* ret/lret */
 	case 0xcb:
 	case 0xc2:
@@ -673,3 +680,29 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	}
 	return false;
 }
+
+void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
+{
+	struct uprobe_task	*utask		= current->utask;
+	struct arch_uprobe_task	*autask		= &utask->autask;
+
+	autask->restore_flags = 0;
+	if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) &&
+			!(auprobe->fixups & UPROBE_FIX_SETF))
+		autask->restore_flags |= UPROBE_CLEAR_TF;
+	/*
+	 * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we
+	 * would to examine the opcode and the flags to make it right. Without
+	 * TF block stepping makes no sense.
+	 */
+	user_enable_single_step(current);
+}
+
+void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
+{
+	struct uprobe_task *utask		= current->utask;
+	struct arch_uprobe_task	*autask		= &utask->autask;
+
+	if (autask->restore_flags & UPROBE_CLEAR_TF)
+		user_disable_single_step(current);
+}
-- 
cgit v1.2.3


From 848e8f5f0ad3169560c516fff6471be65f76e69f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 3 Aug 2012 17:31:46 +0200
Subject: ptrace/x86: Introduce set_task_blockstep() helper

No functional changes, preparation for the next fix and for uprobes
single-step fixes.

Move the code playing with TIF_BLOCKSTEP/DEBUGCTLMSR_BTF into the
new helper, set_task_blockstep().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/step.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index c346d1161488..7a514986ca09 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,6 +157,21 @@ static int enable_single_step(struct task_struct *child)
 	return 1;
 }
 
+static void set_task_blockstep(struct task_struct *task, bool on)
+{
+	unsigned long debugctl;
+
+	debugctl = get_debugctlmsr();
+	if (on) {
+		debugctl |= DEBUGCTLMSR_BTF;
+		set_tsk_thread_flag(task, TIF_BLOCKSTEP);
+	} else {
+		debugctl &= ~DEBUGCTLMSR_BTF;
+		clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
+	}
+	update_debugctlmsr(debugctl);
+}
+
 /*
  * Enable single or block step.
  */
@@ -169,19 +184,10 @@ static void enable_step(struct task_struct *child, bool block)
 	 * So no one should try to use debugger block stepping in a program
 	 * that uses user-mode single stepping itself.
 	 */
-	if (enable_single_step(child) && block) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl |= DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		set_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	} else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl &= ~DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	}
+	if (enable_single_step(child) && block)
+		set_task_blockstep(child, true);
+	else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+		set_task_blockstep(child, false);
 }
 
 void user_enable_single_step(struct task_struct *child)
@@ -199,13 +205,8 @@ void user_disable_single_step(struct task_struct *child)
 	/*
 	 * Make sure block stepping (BTF) is disabled.
 	 */
-	if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) {
-		unsigned long debugctl = get_debugctlmsr();
-
-		debugctl &= ~DEBUGCTLMSR_BTF;
-		update_debugctlmsr(debugctl);
-		clear_tsk_thread_flag(child, TIF_BLOCKSTEP);
-	}
+	if (test_tsk_thread_flag(child, TIF_BLOCKSTEP))
+		set_task_blockstep(child, false);
 
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
-- 
cgit v1.2.3


From 95cf00fa5d5e2a200a2c044c84bde8389a237e02 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 11 Aug 2012 18:06:42 +0200
Subject: ptrace/x86: Partly fix set_task_blockstep()->update_debugctlmsr()
 logic

Afaics the usage of update_debugctlmsr() and TIF_BLOCKSTEP in
step.c was always very wrong.

1. update_debugctlmsr() was simply unneeded. The child sleeps
   TASK_TRACED, __switch_to_xtra(next_p => child) should notice
   TIF_BLOCKSTEP and set/clear DEBUGCTLMSR_BTF after resume if
   needed.

2. It is wrong. The state of DEBUGCTLMSR_BTF bit in CPU register
   should always match the state of current's TIF_BLOCKSTEP bit.

3. Even get_debugctlmsr() + update_debugctlmsr() itself does not
   look right. Irq can change other bits in MSR_IA32_DEBUGCTLMSR
   register or the caller can be preempted in between.

4. It is not safe to play with TIF_BLOCKSTEP if task != current.
   DEBUGCTLMSR_BTF and TIF_BLOCKSTEP should always match each
   other if the task is running. The tracee is stopped but it
   can be SIGKILL'ed right before set/clear_tsk_thread_flag().

However, now that uprobes uses user_enable_single_step(current)
we can't simply remove update_debugctlmsr(). So this patch adds
the additional "task == current" check and disables irqs to avoid
the race with interrupts/preemption.

Unfortunately this patch doesn't solve the last problem, we need
another fix. Probably we should teach ptrace_stop() to set/clear
single/block stepping after resume.

And afaics there is yet another problem: perf can play with
MSR_IA32_DEBUGCTLMSR from nmi, this obviously means that even
__switch_to_xtra() has problems.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 arch/x86/kernel/step.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 7a514986ca09..f89cdc6ccd5b 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -161,6 +161,16 @@ static void set_task_blockstep(struct task_struct *task, bool on)
 {
 	unsigned long debugctl;
 
+	/*
+	 * Ensure irq/preemption can't change debugctl in between.
+	 * Note also that both TIF_BLOCKSTEP and debugctl should
+	 * be changed atomically wrt preemption.
+	 * FIXME: this means that set/clear TIF_BLOCKSTEP is simply
+	 * wrong if task != current, SIGKILL can wakeup the stopped
+	 * tracee and set/clear can play with the running task, this
+	 * can confuse the next __switch_to_xtra().
+	 */
+	local_irq_disable();
 	debugctl = get_debugctlmsr();
 	if (on) {
 		debugctl |= DEBUGCTLMSR_BTF;
@@ -169,7 +179,9 @@ static void set_task_blockstep(struct task_struct *task, bool on)
 		debugctl &= ~DEBUGCTLMSR_BTF;
 		clear_tsk_thread_flag(task, TIF_BLOCKSTEP);
 	}
-	update_debugctlmsr(debugctl);
+	if (task == current)
+		update_debugctlmsr(debugctl);
+	local_irq_enable();
 }
 
 /*
-- 
cgit v1.2.3


From 9bd1190a11c9d2c59d35cb999b8d170ad52aab5f Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 15:24:17 +0200
Subject: uprobes/x86: Do not (ab)use TIF_SINGLESTEP/user_*_single_step() for
 single-stepping

user_enable/disable_single_step() was designed for ptrace, it assumes
a single user and does unnecessary and wrong things for uprobes. For
example:

	- arch_uprobe_enable_step() can't trust TIF_SINGLESTEP, an
	  application itself can set X86_EFLAGS_TF which must be
	  preserved after arch_uprobe_disable_step().

	- we do not want to set TIF_SINGLESTEP/TIF_FORCED_TF in
	  arch_uprobe_enable_step(), this only makes sense for ptrace.

	- otoh we leak TIF_SINGLESTEP if arch_uprobe_disable_step()
	  doesn't do user_disable_single_step(), the application will
	  be killed after the next syscall.

	- arch_uprobe_enable_step() does access_process_vm() we do
	  not need/want.

Change arch_uprobe_enable/disable_step() to set/clear X86_EFLAGS_TF
directly, this is much simpler and more correct. However, we need to
clear TIF_BLOCKSTEP/DEBUGCTLMSR_BTF before executing the probed insn,
add set_task_blockstep(false).

Note: with or without this patch, there is another (hopefully minor)
problem. A probed "pushf" insn can see the wrong X86_EFLAGS_TF set by
uprobes. Perhaps we should change _disable to update the stack, or
teach arch_uprobe_skip_sstep() to emulate this insn.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/processor.h |  2 ++
 arch/x86/kernel/step.c           |  2 +-
 arch/x86/kernel/uprobes.c        | 32 ++++++++++++++++++--------------
 3 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bcad..433d2e5c98a7 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -759,6 +759,8 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
+extern void set_task_blockstep(struct task_struct *task, bool on);
+
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index f89cdc6ccd5b..cd3b2438a980 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,7 +157,7 @@ static int enable_single_step(struct task_struct *child)
 	return 1;
 }
 
-static void set_task_blockstep(struct task_struct *task, bool on)
+void set_task_blockstep(struct task_struct *task, bool on)
 {
 	unsigned long debugctl;
 
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 309a0e02b124..3b4aae68efe0 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -683,26 +683,30 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 
 void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
 {
-	struct uprobe_task	*utask		= current->utask;
-	struct arch_uprobe_task	*autask		= &utask->autask;
+	struct task_struct *task = current;
+	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	struct pt_regs *regs = task_pt_regs(task);
 
 	autask->restore_flags = 0;
-	if (!test_tsk_thread_flag(current, TIF_SINGLESTEP) &&
-			!(auprobe->fixups & UPROBE_FIX_SETF))
+	if (!(regs->flags & X86_EFLAGS_TF) &&
+	    !(auprobe->fixups & UPROBE_FIX_SETF))
 		autask->restore_flags |= UPROBE_CLEAR_TF;
-	/*
-	 * The state of TIF_BLOCKSTEP is not saved. With the TF flag set we
-	 * would to examine the opcode and the flags to make it right. Without
-	 * TF block stepping makes no sense.
-	 */
-	user_enable_single_step(current);
+
+	regs->flags |= X86_EFLAGS_TF;
+	if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
+		set_task_blockstep(task, false);
 }
 
 void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 {
-	struct uprobe_task *utask		= current->utask;
-	struct arch_uprobe_task	*autask		= &utask->autask;
-
+	struct task_struct *task = current;
+	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	struct pt_regs *regs = task_pt_regs(task);
+	/*
+	 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
+	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
+	 * make it right.
+	 */
 	if (autask->restore_flags & UPROBE_CLEAR_TF)
-		user_disable_single_step(current);
+		regs->flags &= ~X86_EFLAGS_TF;
 }
-- 
cgit v1.2.3


From 3a4664aa8362d9fa9110828f55afa9f9fcd7e484 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 16:05:10 +0200
Subject: uprobes/x86: Xol should send SIGTRAP if X86_EFLAGS_TF was set

arch_uprobe_disable_step() correctly preserves X86_EFLAGS_TF and
returns to user-mode. But this means the application gets SIGTRAP
only after the next insn.

This means that UPROBE_CLEAR_TF logic is not really right. _enable
should only record the state of X86_EFLAGS_TF, and _disable should
check it separately from UPROBE_FIX_SETF.

Remove arch_uprobe_task->restore_flags, add ->saved_tf instead, and
change enable/disable accordingly. This assumes that the probed insn
was not trapped, see the next patch.

arch_uprobe_skip_sstep() logic has the same problem, change it to
check X86_EFLAGS_TF and send SIGTRAP as well. We will cleanup this
all after we fold enable/disable_step into pre/post_hol hooks.

Note: send_sig(SIGTRAP) is not actually right, we need send_sigtrap().
But this needs more changes, handle_swbp() does the same and this is
equally wrong.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h |  3 +--
 arch/x86/kernel/uprobes.c      | 19 +++++++++++++------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index cee58624cb30..d561ff5a3d4d 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -46,8 +46,7 @@ struct arch_uprobe_task {
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
-#define UPROBE_CLEAR_TF			(1 << 0)
-	unsigned int			restore_flags;
+	unsigned int			saved_tf;
 };
 
 extern int  arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr);
diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 3b4aae68efe0..7e993d1f1992 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -653,7 +653,7 @@ void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
  * Skip these instructions as per the currently known x86 ISA.
  * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
  */
-bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+static bool __skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 {
 	int i;
 
@@ -681,16 +681,21 @@ bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
 	return false;
 }
 
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+	bool ret = __skip_sstep(auprobe, regs);
+	if (ret && (regs->flags & X86_EFLAGS_TF))
+		send_sig(SIGTRAP, current, 0);
+	return ret;
+}
+
 void arch_uprobe_enable_step(struct arch_uprobe *auprobe)
 {
 	struct task_struct *task = current;
 	struct arch_uprobe_task	*autask	= &task->utask->autask;
 	struct pt_regs *regs = task_pt_regs(task);
 
-	autask->restore_flags = 0;
-	if (!(regs->flags & X86_EFLAGS_TF) &&
-	    !(auprobe->fixups & UPROBE_FIX_SETF))
-		autask->restore_flags |= UPROBE_CLEAR_TF;
+	autask->saved_tf = !!(regs->flags & X86_EFLAGS_TF);
 
 	regs->flags |= X86_EFLAGS_TF;
 	if (test_tsk_thread_flag(task, TIF_BLOCKSTEP))
@@ -707,6 +712,8 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
 	 * make it right.
 	 */
-	if (autask->restore_flags & UPROBE_CLEAR_TF)
+	if (autask->saved_tf)
+		send_sig(SIGTRAP, task, 0);
+	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
 		regs->flags &= ~X86_EFLAGS_TF;
 }
-- 
cgit v1.2.3


From d6a00b35e411519d774d978cdf80e4406d01b36b Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sat, 8 Sep 2012 18:38:15 +0200
Subject: uprobes/x86: Fix arch_uprobe_disable_step() && UTASK_SSTEP_TRAPPED
 interaction

arch_uprobe_disable_step() should also take UTASK_SSTEP_TRAPPED into
account. In this case the probed insn was not executed, we need to
clear X86_EFLAGS_TF if it was set by us and that is all.

Again, this code will look more clean when we move it into
arch_uprobe_post_xol() and arch_uprobe_abort_xol().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/kernel/uprobes.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c
index 7e993d1f1992..9538f00827a9 100644
--- a/arch/x86/kernel/uprobes.c
+++ b/arch/x86/kernel/uprobes.c
@@ -706,14 +706,20 @@ void arch_uprobe_disable_step(struct arch_uprobe *auprobe)
 {
 	struct task_struct *task = current;
 	struct arch_uprobe_task	*autask	= &task->utask->autask;
+	bool trapped = (task->utask->state == UTASK_SSTEP_TRAPPED);
 	struct pt_regs *regs = task_pt_regs(task);
 	/*
 	 * The state of TIF_BLOCKSTEP was not saved so we can get an extra
 	 * SIGTRAP if we do not clear TF. We need to examine the opcode to
 	 * make it right.
 	 */
-	if (autask->saved_tf)
-		send_sig(SIGTRAP, task, 0);
-	else if (!(auprobe->fixups & UPROBE_FIX_SETF))
-		regs->flags &= ~X86_EFLAGS_TF;
+	if (unlikely(trapped)) {
+		if (!autask->saved_tf)
+			regs->flags &= ~X86_EFLAGS_TF;
+	} else {
+		if (autask->saved_tf)
+			send_sig(SIGTRAP, task, 0);
+		else if (!(auprobe->fixups & UPROBE_FIX_SETF))
+			regs->flags &= ~X86_EFLAGS_TF;
+	}
 }
-- 
cgit v1.2.3


From baedbf02b1912225d60dd7403acb4b4e003088b5 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 3 Sep 2012 17:02:16 +0200
Subject: uprobes: Make arch_uprobe_task->saved_trap_nr "unsigned int"

Make arch_uprobe_task->saved_trap_nr "unsigned int" and move it down
after ->saved_scratch_register, this changes sizeof() from 24 to 16.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
---
 arch/x86/include/asm/uprobes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h
index d561ff5a3d4d..8ff8be7835ab 100644
--- a/arch/x86/include/asm/uprobes.h
+++ b/arch/x86/include/asm/uprobes.h
@@ -42,10 +42,10 @@ struct arch_uprobe {
 };
 
 struct arch_uprobe_task {
-	unsigned long			saved_trap_nr;
 #ifdef CONFIG_X86_64
 	unsigned long			saved_scratch_register;
 #endif
+	unsigned int			saved_trap_nr;
 	unsigned int			saved_tf;
 };
 
-- 
cgit v1.2.3


From 38cb5ef4473c6f510fae3a00bdac3acd550e3796 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 26 Jul 2012 18:00:27 -0400
Subject: X86: Improve GOP detection in the EFI boot stub

We currently use the PCI IO protocol as a proxy for a functional GOP. This
is less than ideal, since some platforms will put the GOP on output devices
rather than the GPU itself. Move to using the conout protocol. This is not
guaranteed per-spec, but is part of the consplitter implementation that
causes this problem in the first place and so should be reliable.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 29 ++++++++++++++++-------------
 arch/x86/boot/compressed/eboot.h |  4 ++++
 2 files changed, 20 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index b3e0227df2c9..d5e4044505d3 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -276,8 +276,9 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 	nr_gops = size / sizeof(void *);
 	for (i = 0; i < nr_gops; i++) {
 		struct efi_graphics_output_mode_info *info;
-		efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID;
-		void *pciio;
+		efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID;
+		bool conout_found = false;
+		void *dummy;
 		void *h = gop_handle[i];
 
 		status = efi_call_phys3(sys_table->boottime->handle_protocol,
@@ -285,19 +286,21 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		if (status != EFI_SUCCESS)
 			continue;
 
-		efi_call_phys3(sys_table->boottime->handle_protocol,
-			       h, &pciio_proto, &pciio);
+		status = efi_call_phys3(sys_table->boottime->handle_protocol,
+					h, &conout_proto, &dummy);
+
+		if (status == EFI_SUCCESS)
+			conout_found = true;
 
 		status = efi_call_phys4(gop->query_mode, gop,
 					gop->mode->mode, &size, &info);
-		if (status == EFI_SUCCESS && (!first_gop || pciio)) {
+		if (status == EFI_SUCCESS && (!first_gop || conout_found)) {
 			/*
-			 * Apple provide GOPs that are not backed by
-			 * real hardware (they're used to handle
-			 * multiple displays). The workaround is to
-			 * search for a GOP implementing the PCIIO
-			 * protocol, and if one isn't found, to just
-			 * fallback to the first GOP.
+			 * Systems that use the UEFI Console Splitter may
+			 * provide multiple GOP devices, not all of which are
+			 * backed by real hardware. The workaround is to search
+			 * for a GOP implementing the ConOut protocol, and if
+			 * one isn't found, to just fall back to the first GOP.
 			 */
 			width = info->horizontal_resolution;
 			height = info->vertical_resolution;
@@ -308,10 +311,10 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 			pixels_per_scan_line = info->pixels_per_scan_line;
 
 			/*
-			 * Once we've found a GOP supporting PCIIO,
+			 * Once we've found a GOP supporting ConOut,
 			 * don't bother looking any further.
 			 */
-			if (pciio)
+			if (conout_found)
 				break;
 
 			first_gop = gop;
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h
index 3b6e15627c55..e5b0a8f91c5f 100644
--- a/arch/x86/boot/compressed/eboot.h
+++ b/arch/x86/boot/compressed/eboot.h
@@ -14,6 +14,10 @@
 #define EFI_PAGE_SIZE		(1UL << EFI_PAGE_SHIFT)
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
+#define EFI_CONSOLE_OUT_DEVICE_GUID    \
+	EFI_GUID(0xd3b36f2c, 0xd551, 0x11d4, 0x9a, 0x46, 0x0, 0x90, 0x27, \
+		  0x3f, 0xc1, 0x4d)
+
 #define PIXEL_RGB_RESERVED_8BIT_PER_COLOR		0
 #define PIXEL_BGR_RESERVED_8BIT_PER_COLOR		1
 #define PIXEL_BIT_MASK					2
-- 
cgit v1.2.3


From 9dead5bbb825d7c25c0400e61de83075046322d0 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Thu, 26 Jul 2012 18:00:00 -0400
Subject: efi: Build EFI stub with EFI-appropriate options

We can't assume the presence of the red zone while we're still in a boot
services environment, so we should build with -fno-red-zone to avoid
problems. Change the size of wchar at the same time to make string handling
simpler.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/Makefile | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index e398bb5d63bb..8a84501acb1b 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -28,6 +28,9 @@ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
 	$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
 	$(obj)/piggy.o
 
+$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone
+$(obj)/efi_stub_$(BITS).o: KBUILD_CLFAGS += -fshort-wchar -mno-red-zone
+
 ifeq ($(CONFIG_EFI_STUB), y)
 	VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
 endif
-- 
cgit v1.2.3


From d6cf86d8f23253225fe2a763d627ecf7dfee9dae Mon Sep 17 00:00:00 2001
From: Seiji Aguchi <seiji.aguchi@hds.com>
Date: Tue, 24 Jul 2012 13:27:23 +0000
Subject: efi: initialize efi.runtime_version to make
 query_variable_info/update_capsule workable

A value of efi.runtime_version is checked before calling
update_capsule()/query_variable_info() as follows.
But it isn't initialized anywhere.

<snip>
static efi_status_t virt_efi_query_variable_info(u32 attr,
                                                 u64 *storage_space,
                                                 u64 *remaining_space,
                                                 u64 *max_variable_size)
{
        if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION)
                return EFI_UNSUPPORTED;
<snip>

This patch initializes a value of efi.runtime_version at boot time.

Signed-off-by: Seiji Aguchi <seiji.aguchi@hds.com>
Acked-by: Matthew Garrett <mjg@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 92660edaa1e7..f55a4ce6dc49 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -890,6 +890,7 @@ void __init efi_enter_virtual_mode(void)
 	 *
 	 * Call EFI services through wrapper functions.
 	 */
+	efi.runtime_version = efi_systab.fw_revision;
 	efi.get_time = virt_efi_get_time;
 	efi.set_time = virt_efi_set_time;
 	efi.get_wakeup_time = virt_efi_get_wakeup_time;
-- 
cgit v1.2.3


From f462ed939de67c20528bc08f11d2fc4f2d59c0d5 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 27 Jul 2012 12:58:53 -0400
Subject: efifb: Skip DMI checks if the bootloader knows what it's doing

The majority of the DMI checks in efifb are for cases where the bootloader
has provided invalid information. However, on some machines the overrides
may do more harm than good due to configuration differences between machines
with the same machine identifier. It turns out that it's possible for the
bootloader to get the correct information on GOP-based systems, but we
can't guarantee that the kernel's being booted with one that's been updated
to do so. Add support for a capabilities flag that can be set by the
bootloader, and skip the DMI checks in that case. Additionally, set this
flag in the UEFI stub code.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Acked-by: Peter Jones <pjones@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 2 ++
 drivers/video/efifb.c            | 4 +++-
 include/linux/screen_info.h      | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index d5e4044505d3..bbd83b9cb4da 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -379,6 +379,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		si->rsvd_pos = 0;
 	}
 
+	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
+
 free_handle:
 	efi_call_phys1(sys_table->boottime->free_pool, gop_handle);
 	return status;
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index b4a632ada401..932abaa58a89 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -553,7 +553,9 @@ static int __init efifb_init(void)
 	int ret;
 	char *option = NULL;
 
-	dmi_check_system(dmi_system_table);
+	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
+	    !(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
+		dmi_check_system(dmi_system_table);
 
 	if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
 		return -ENODEV;
diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 899fbb487c94..fb3c5a8fef3d 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -68,6 +68,8 @@ struct screen_info {
 
 #define VIDEO_FLAGS_NOCURSOR	(1 << 0) /* The video mode has no cursor set */
 
+#define VIDEO_CAPABILITY_SKIP_QUIRKS	(1 << 0)
+
 #ifdef __KERNEL__
 extern struct screen_info screen_info;
 
-- 
cgit v1.2.3


From e9b10953edbccd3744e039ffc060ab2692f17856 Mon Sep 17 00:00:00 2001
From: Matthew Garrett <mjg@redhat.com>
Date: Fri, 27 Jul 2012 17:20:49 -0400
Subject: x86, EFI: Calculate the EFI framebuffer size instead of trusting the
 firmware

Seth Forshee reported that his system was reporting that the EFI framebuffer
stretched from 0x90010000-0xb0010000 despite the GPU's BAR only covering
0x90000000-0x9ffffff. It's safer to calculate this value from the pixel
stride and screen height (values we already depend on) rather than face
potential problems with resource allocation later on.

Signed-off-by: Matthew Garrett <mjg@redhat.com>
Tested-by: Seth Forshee <seth.forshee@canonical.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/boot/compressed/eboot.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index bbd83b9cb4da..c760e073963e 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -331,7 +331,6 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 	si->lfb_width = width;
 	si->lfb_height = height;
 	si->lfb_base = fb_base;
-	si->lfb_size = fb_size;
 	si->pages = 1;
 
 	if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) {
@@ -379,6 +378,8 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto,
 		si->rsvd_pos = 0;
 	}
 
+	si->lfb_size = si->lfb_linelength * si->lfb_height;
+
 	si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS;
 
 free_handle:
-- 
cgit v1.2.3


From 83287ea420ced7242a704488aab0fcdcf2ced9ab Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:57 +0300
Subject: KVM: VMX: Make lto-friendly

LTO (link-time optimization) doesn't like local labels to be referred to
from a different function, since the two functions may be built in separate
compilation units.  Use an external variable instead.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d62b4139a292..5faf12ace546 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -127,6 +127,8 @@ module_param(ple_gap, int, S_IRUGO);
 static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
 module_param(ple_window, int, S_IRUGO);
 
+extern const ulong vmx_return;
+
 #define NR_AUTOLOAD_MSRS 8
 #define VMCS02_POOL_SIZE 1
 
@@ -3724,8 +3726,7 @@ static void vmx_set_constant_host_state(void)
 	native_store_idt(&dt);
 	vmcs_writel(HOST_IDTR_BASE, dt.address);   /* 22.2.4 */
 
-	asm("mov $.Lkvm_vmx_return, %0" : "=r"(tmpl));
-	vmcs_writel(HOST_RIP, tmpl); /* 22.2.5 */
+	vmcs_writel(HOST_RIP, vmx_return); /* 22.2.5 */
 
 	rdmsr(MSR_IA32_SYSENTER_CS, low32, high32);
 	vmcs_write32(HOST_IA32_SYSENTER_CS, low32);
@@ -6276,11 +6277,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
 
 		/* Enter guest mode */
-		"jne .Llaunched \n\t"
+		"jne 1f \n\t"
 		__ex(ASM_VMX_VMLAUNCH) "\n\t"
-		"jmp .Lkvm_vmx_return \n\t"
-		".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t"
-		".Lkvm_vmx_return: "
+		"jmp 2f \n\t"
+		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
+		"2: "
 		/* Save guest registers, load host registers, keep flags */
 		"mov %0, %c[wordsize](%%"R"sp) \n\t"
 		"pop %0 \n\t"
@@ -6306,6 +6307,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 
 		"pop  %%"R"bp; pop  %%"R"dx \n\t"
 		"setbe %c[fail](%0) \n\t"
+		".pushsection .rodata \n\t"
+		".global vmx_return \n\t"
+		"vmx_return: " _ASM_PTR " 2b \n\t"
+		".popsection"
 	      : : "c"(vmx), "d"((unsigned long)HOST_RSP),
 		[launched]"i"(offsetof(struct vcpu_vmx, __launched)),
 		[fail]"i"(offsetof(struct vcpu_vmx, fail)),
-- 
cgit v1.2.3


From b188c81f2e1a188ddda6a3d353e5b546c30a9b90 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:58 +0300
Subject: KVM: VMX: Make use of asm.h

Use macros for bitness-insensitive register names, instead of
rolling our own.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 69 ++++++++++++++++++++++++------------------------------
 1 file changed, 30 insertions(+), 39 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5faf12ace546..30bcb953afee 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6184,14 +6184,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
 					msrs[i].host);
 }
 
-#ifdef CONFIG_X86_64
-#define R "r"
-#define Q "q"
-#else
-#define R "e"
-#define Q "l"
-#endif
-
 static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -6240,30 +6232,30 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx->__launched = vmx->loaded_vmcs->launched;
 	asm(
 		/* Store host registers */
-		"push %%"R"dx; push %%"R"bp;"
-		"push %%"R"cx \n\t" /* placeholder for guest rcx */
-		"push %%"R"cx \n\t"
-		"cmp %%"R"sp, %c[host_rsp](%0) \n\t"
+		"push %%" _ASM_DX "; push %%" _ASM_BP ";"
+		"push %%" _ASM_CX " \n\t" /* placeholder for guest rcx */
+		"push %%" _ASM_CX " \n\t"
+		"cmp %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
 		"je 1f \n\t"
-		"mov %%"R"sp, %c[host_rsp](%0) \n\t"
+		"mov %%" _ASM_SP ", %c[host_rsp](%0) \n\t"
 		__ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t"
 		"1: \n\t"
 		/* Reload cr2 if changed */
-		"mov %c[cr2](%0), %%"R"ax \n\t"
-		"mov %%cr2, %%"R"dx \n\t"
-		"cmp %%"R"ax, %%"R"dx \n\t"
+		"mov %c[cr2](%0), %%" _ASM_AX " \n\t"
+		"mov %%cr2, %%" _ASM_DX " \n\t"
+		"cmp %%" _ASM_AX ", %%" _ASM_DX " \n\t"
 		"je 2f \n\t"
-		"mov %%"R"ax, %%cr2 \n\t"
+		"mov %%" _ASM_AX", %%cr2 \n\t"
 		"2: \n\t"
 		/* Check if vmlaunch of vmresume is needed */
 		"cmpl $0, %c[launched](%0) \n\t"
 		/* Load guest registers.  Don't clobber flags. */
-		"mov %c[rax](%0), %%"R"ax \n\t"
-		"mov %c[rbx](%0), %%"R"bx \n\t"
-		"mov %c[rdx](%0), %%"R"dx \n\t"
-		"mov %c[rsi](%0), %%"R"si \n\t"
-		"mov %c[rdi](%0), %%"R"di \n\t"
-		"mov %c[rbp](%0), %%"R"bp \n\t"
+		"mov %c[rax](%0), %%" _ASM_AX " \n\t"
+		"mov %c[rbx](%0), %%" _ASM_BX " \n\t"
+		"mov %c[rdx](%0), %%" _ASM_DX " \n\t"
+		"mov %c[rsi](%0), %%" _ASM_SI " \n\t"
+		"mov %c[rdi](%0), %%" _ASM_DI " \n\t"
+		"mov %c[rbp](%0), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
 		"mov %c[r8](%0),  %%r8  \n\t"
 		"mov %c[r9](%0),  %%r9  \n\t"
@@ -6274,7 +6266,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %c[r14](%0), %%r14 \n\t"
 		"mov %c[r15](%0), %%r15 \n\t"
 #endif
-		"mov %c[rcx](%0), %%"R"cx \n\t" /* kills %0 (ecx) */
+		"mov %c[rcx](%0), %%" _ASM_CX " \n\t" /* kills %0 (ecx) */
 
 		/* Enter guest mode */
 		"jne 1f \n\t"
@@ -6283,15 +6275,15 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"1: " __ex(ASM_VMX_VMRESUME) "\n\t"
 		"2: "
 		/* Save guest registers, load host registers, keep flags */
-		"mov %0, %c[wordsize](%%"R"sp) \n\t"
+		"mov %0, %c[wordsize](%%" _ASM_SP ") \n\t"
 		"pop %0 \n\t"
-		"mov %%"R"ax, %c[rax](%0) \n\t"
-		"mov %%"R"bx, %c[rbx](%0) \n\t"
-		"pop"Q" %c[rcx](%0) \n\t"
-		"mov %%"R"dx, %c[rdx](%0) \n\t"
-		"mov %%"R"si, %c[rsi](%0) \n\t"
-		"mov %%"R"di, %c[rdi](%0) \n\t"
-		"mov %%"R"bp, %c[rbp](%0) \n\t"
+		"mov %%" _ASM_AX ", %c[rax](%0) \n\t"
+		"mov %%" _ASM_BX ", %c[rbx](%0) \n\t"
+		__ASM_SIZE(pop) " %c[rcx](%0) \n\t"
+		"mov %%" _ASM_DX ", %c[rdx](%0) \n\t"
+		"mov %%" _ASM_SI ", %c[rsi](%0) \n\t"
+		"mov %%" _ASM_DI ", %c[rdi](%0) \n\t"
+		"mov %%" _ASM_BP ", %c[rbp](%0) \n\t"
 #ifdef CONFIG_X86_64
 		"mov %%r8,  %c[r8](%0) \n\t"
 		"mov %%r9,  %c[r9](%0) \n\t"
@@ -6302,10 +6294,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r14, %c[r14](%0) \n\t"
 		"mov %%r15, %c[r15](%0) \n\t"
 #endif
-		"mov %%cr2, %%"R"ax   \n\t"
-		"mov %%"R"ax, %c[cr2](%0) \n\t"
+		"mov %%cr2, %%" _ASM_AX "   \n\t"
+		"mov %%" _ASM_AX ", %c[cr2](%0) \n\t"
 
-		"pop  %%"R"bp; pop  %%"R"dx \n\t"
+		"pop  %%" _ASM_BP "; pop  %%" _ASM_DX " \n\t"
 		"setbe %c[fail](%0) \n\t"
 		".pushsection .rodata \n\t"
 		".global vmx_return \n\t"
@@ -6335,9 +6327,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 		[cr2]"i"(offsetof(struct vcpu_vmx, vcpu.arch.cr2)),
 		[wordsize]"i"(sizeof(ulong))
 	      : "cc", "memory"
-		, R"ax", R"bx", R"di", R"si"
 #ifdef CONFIG_X86_64
+		, "rax", "rbx", "rdi", "rsi"
 		, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#else
+		, "eax", "ebx", "edi", "esi"
 #endif
 	      );
 
@@ -6389,9 +6383,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	vmx_complete_interrupts(vmx);
 }
 
-#undef R
-#undef Q
-
 static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-- 
cgit v1.2.3


From 7454766f7bead388251aedee35a478356a7f4e72 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:10:59 +0300
Subject: KVM: SVM: Make use of asm.h

Use macros for bitness-insensitive register names, instead of
rolling our own.

Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/svm.c | 46 ++++++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 26 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 611c72875fb9..818fceb3091e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -3782,12 +3782,6 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu)
 	svm_complete_interrupts(svm);
 }
 
-#ifdef CONFIG_X86_64
-#define R "r"
-#else
-#define R "e"
-#endif
-
 static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -3814,13 +3808,13 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	local_irq_enable();
 
 	asm volatile (
-		"push %%"R"bp; \n\t"
-		"mov %c[rbx](%[svm]), %%"R"bx \n\t"
-		"mov %c[rcx](%[svm]), %%"R"cx \n\t"
-		"mov %c[rdx](%[svm]), %%"R"dx \n\t"
-		"mov %c[rsi](%[svm]), %%"R"si \n\t"
-		"mov %c[rdi](%[svm]), %%"R"di \n\t"
-		"mov %c[rbp](%[svm]), %%"R"bp \n\t"
+		"push %%" _ASM_BP "; \n\t"
+		"mov %c[rbx](%[svm]), %%" _ASM_BX " \n\t"
+		"mov %c[rcx](%[svm]), %%" _ASM_CX " \n\t"
+		"mov %c[rdx](%[svm]), %%" _ASM_DX " \n\t"
+		"mov %c[rsi](%[svm]), %%" _ASM_SI " \n\t"
+		"mov %c[rdi](%[svm]), %%" _ASM_DI " \n\t"
+		"mov %c[rbp](%[svm]), %%" _ASM_BP " \n\t"
 #ifdef CONFIG_X86_64
 		"mov %c[r8](%[svm]),  %%r8  \n\t"
 		"mov %c[r9](%[svm]),  %%r9  \n\t"
@@ -3833,20 +3827,20 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 #endif
 
 		/* Enter guest mode */
-		"push %%"R"ax \n\t"
-		"mov %c[vmcb](%[svm]), %%"R"ax \n\t"
+		"push %%" _ASM_AX " \n\t"
+		"mov %c[vmcb](%[svm]), %%" _ASM_AX " \n\t"
 		__ex(SVM_VMLOAD) "\n\t"
 		__ex(SVM_VMRUN) "\n\t"
 		__ex(SVM_VMSAVE) "\n\t"
-		"pop %%"R"ax \n\t"
+		"pop %%" _ASM_AX " \n\t"
 
 		/* Save guest registers, load host registers */
-		"mov %%"R"bx, %c[rbx](%[svm]) \n\t"
-		"mov %%"R"cx, %c[rcx](%[svm]) \n\t"
-		"mov %%"R"dx, %c[rdx](%[svm]) \n\t"
-		"mov %%"R"si, %c[rsi](%[svm]) \n\t"
-		"mov %%"R"di, %c[rdi](%[svm]) \n\t"
-		"mov %%"R"bp, %c[rbp](%[svm]) \n\t"
+		"mov %%" _ASM_BX ", %c[rbx](%[svm]) \n\t"
+		"mov %%" _ASM_CX ", %c[rcx](%[svm]) \n\t"
+		"mov %%" _ASM_DX ", %c[rdx](%[svm]) \n\t"
+		"mov %%" _ASM_SI ", %c[rsi](%[svm]) \n\t"
+		"mov %%" _ASM_DI ", %c[rdi](%[svm]) \n\t"
+		"mov %%" _ASM_BP ", %c[rbp](%[svm]) \n\t"
 #ifdef CONFIG_X86_64
 		"mov %%r8,  %c[r8](%[svm]) \n\t"
 		"mov %%r9,  %c[r9](%[svm]) \n\t"
@@ -3857,7 +3851,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		"mov %%r14, %c[r14](%[svm]) \n\t"
 		"mov %%r15, %c[r15](%[svm]) \n\t"
 #endif
-		"pop %%"R"bp"
+		"pop %%" _ASM_BP
 		:
 		: [svm]"a"(svm),
 		  [vmcb]"i"(offsetof(struct vcpu_svm, vmcb_pa)),
@@ -3878,9 +3872,11 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		  [r15]"i"(offsetof(struct vcpu_svm, vcpu.arch.regs[VCPU_REGS_R15]))
 #endif
 		: "cc", "memory"
-		, R"bx", R"cx", R"dx", R"si", R"di"
 #ifdef CONFIG_X86_64
+		, "rbx", "rcx", "rdx", "rsi", "rdi"
 		, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
+#else
+		, "ebx", "ecx", "edx", "esi", "edi"
 #endif
 		);
 
@@ -3940,8 +3936,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 	mark_all_clean(svm->vmcb);
 }
 
-#undef R
-
 static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
-- 
cgit v1.2.3


From 314d9f63f385096580e9e2a06eaa0745d92fe4ac Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Mon, 10 Sep 2012 15:53:49 +0800
Subject: perf/x86: Add cpumask for uncore pmu

This patch adds a cpumask file to the uncore pmu sysfs directory.  The
cpumask file contains one active cpu for every socket.

Signed-off-by: "Yan, Zheng" <zheng.z.yan@intel.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Cc: "Yan, Zheng" <zheng.z.yan@intel.com>
Link: http://lkml.kernel.org/r/1347263631-23175-2-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 28 ++++++++++++++++++++++++---
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  6 ++++--
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 0a5571080e74..62ec3e6af7ea 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -2341,6 +2341,27 @@ int uncore_pmu_event_init(struct perf_event *event)
 	return ret;
 }
 
+static ssize_t uncore_get_attr_cpumask(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
+
+	buf[n++] = '\n';
+	buf[n] = '\0';
+	return n;
+}
+
+static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
+
+static struct attribute *uncore_pmu_attrs[] = {
+	&dev_attr_cpumask.attr,
+	NULL,
+};
+
+static struct attribute_group uncore_pmu_attr_group = {
+	.attrs = uncore_pmu_attrs,
+};
+
 static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu)
 {
 	int ret;
@@ -2378,8 +2399,8 @@ static void __init uncore_type_exit(struct intel_uncore_type *type)
 		free_percpu(type->pmus[i].box);
 	kfree(type->pmus);
 	type->pmus = NULL;
-	kfree(type->attr_groups[1]);
-	type->attr_groups[1] = NULL;
+	kfree(type->events_group);
+	type->events_group = NULL;
 }
 
 static void __init uncore_types_exit(struct intel_uncore_type **types)
@@ -2431,9 +2452,10 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
 		for (j = 0; j < i; j++)
 			attrs[j] = &type->event_descs[j].attr.attr;
 
-		type->attr_groups[1] = events_group;
+		type->events_group = events_group;
 	}
 
+	type->pmu_group = &uncore_pmu_attr_group;
 	type->pmus = pmus;
 	return 0;
 fail:
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 5b81c1856aac..e68a4550e952 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -369,10 +369,12 @@ struct intel_uncore_type {
 	struct intel_uncore_pmu *pmus;
 	struct intel_uncore_ops *ops;
 	struct uncore_event_desc *event_descs;
-	const struct attribute_group *attr_groups[3];
+	const struct attribute_group *attr_groups[4];
 };
 
-#define format_group attr_groups[0]
+#define pmu_group attr_groups[0]
+#define format_group attr_groups[1]
+#define events_group attr_groups[2]
 
 struct intel_uncore_ops {
 	void (*init_box)(struct intel_uncore_box *);
-- 
cgit v1.2.3


From 9fc77441e5e1bf80b794cc546d2243ee9f4afb75 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 16 Sep 2012 11:50:30 +0300
Subject: KVM: make processes waiting on vcpu mutex killable

vcpu mutex can be held for unlimited time so
taking it with mutex_lock on an ioctl is wrong:
one process could be passed a vcpu fd and
call this ioctl on the vcpu used by another process,
it will then be unkillable until the owner exits.

Call mutex_lock_killable instead and return status.
Note: mutex_lock_interruptible would be even nicer,
but I am not sure all users are prepared to handle EINTR
from these ioctls. They might misinterpret it as an error.

Cleanup paths expect a vcpu that can't be used by
any userspace so this will always succeed - catch bugs
by calling BUG_ON.

Catch callers that don't check return state by adding
__must_check.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/x86.c       | 12 +++++++++---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 10 +++++++---
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c4d451ed1573..19047eafa38d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6016,7 +6016,9 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 	int r;
 
 	vcpu->arch.mtrr_state.have_fixed = 1;
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	if (r)
+		return r;
 	r = kvm_arch_vcpu_reset(vcpu);
 	if (r == 0)
 		r = kvm_mmu_setup(vcpu);
@@ -6027,9 +6029,11 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
+	int r;
 	vcpu->arch.apf.msr_val = 0;
 
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	BUG_ON(r);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 
@@ -6275,7 +6279,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
-	vcpu_load(vcpu);
+	int r;
+	r = vcpu_load(vcpu);
+	BUG_ON(r);
 	kvm_mmu_unload(vcpu);
 	vcpu_put(vcpu);
 }
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 40791930bc15..80bfc880921e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -408,7 +408,7 @@ static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
-void vcpu_load(struct kvm_vcpu *vcpu);
+int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
 int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 4fe02d900810..cc3f6dc506e4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -131,11 +131,12 @@ bool kvm_is_mmio_pfn(pfn_t pfn)
 /*
  * Switches to specified vcpu, until a matching vcpu_put()
  */
-void vcpu_load(struct kvm_vcpu *vcpu)
+int vcpu_load(struct kvm_vcpu *vcpu)
 {
 	int cpu;
 
-	mutex_lock(&vcpu->mutex);
+	if (mutex_lock_killable(&vcpu->mutex))
+		return -EINTR;
 	if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
 		/* The thread running this VCPU changed. */
 		struct pid *oldpid = vcpu->pid;
@@ -148,6 +149,7 @@ void vcpu_load(struct kvm_vcpu *vcpu)
 	preempt_notifier_register(&vcpu->preempt_notifier);
 	kvm_arch_vcpu_load(vcpu, cpu);
 	put_cpu();
+	return 0;
 }
 
 void vcpu_put(struct kvm_vcpu *vcpu)
@@ -1891,7 +1893,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
 #endif
 
 
-	vcpu_load(vcpu);
+	r = vcpu_load(vcpu);
+	if (r)
+		return r;
 	switch (ioctl) {
 	case KVM_RUN:
 		r = -EINVAL;
-- 
cgit v1.2.3


From b82776005369899c1c7ca2e4b2414bb64b538d2c Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 23 Aug 2012 14:36:15 -0400
Subject: xen/swiotlb: Use the swiotlb_late_init_with_tbl to init Xen-SWIOTLB
 late when PV PCI is used.

With this patch we provide the functionality to initialize the
Xen-SWIOTLB late in the bootup cycle - specifically for
Xen PCI-frontend. We still will work if the user had
supplied 'iommu=soft' on the Linux command line.

Note: We cannot depend on after_bootmem to automatically
determine whether this is early or not. This is because
when PCI IOMMUs are initialized it is after after_bootmem but
before a lot of "other" subsystems are initialized.

CC: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
[v1: Fix smatch warnings]
[v2: Added check for xen_swiotlb]
[v3: Rebased with new xen-swiotlb changes]
[v4: squashed xen/swiotlb: Depending on after_bootmem is not correct in]
Reviewed-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/include/asm/xen/swiotlb-xen.h |  2 ++
 arch/x86/xen/pci-swiotlb-xen.c         | 24 +++++++++++++++--
 drivers/xen/swiotlb-xen.c              | 48 +++++++++++++++++++++++++++-------
 include/xen/swiotlb-xen.h              |  2 +-
 4 files changed, 63 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/xen/swiotlb-xen.h b/arch/x86/include/asm/xen/swiotlb-xen.h
index 1be1ab7d6a41..ee52fcac6f72 100644
--- a/arch/x86/include/asm/xen/swiotlb-xen.h
+++ b/arch/x86/include/asm/xen/swiotlb-xen.h
@@ -5,10 +5,12 @@
 extern int xen_swiotlb;
 extern int __init pci_xen_swiotlb_detect(void);
 extern void __init pci_xen_swiotlb_init(void);
+extern int pci_xen_swiotlb_init_late(void);
 #else
 #define xen_swiotlb (0)
 static inline int __init pci_xen_swiotlb_detect(void) { return 0; }
 static inline void __init pci_xen_swiotlb_init(void) { }
+static inline int pci_xen_swiotlb_init_late(void) { return -ENXIO; }
 #endif
 
 #endif /* _ASM_X86_SWIOTLB_XEN_H */
diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index 1c1722761eec..b152640d8388 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -12,7 +12,7 @@
 #include <asm/iommu.h>
 #include <asm/dma.h>
 #endif
-
+#include <linux/export.h>
 int xen_swiotlb __read_mostly;
 
 static struct dma_map_ops xen_swiotlb_dma_ops = {
@@ -69,13 +69,33 @@ int __init pci_xen_swiotlb_detect(void)
 void __init pci_xen_swiotlb_init(void)
 {
 	if (xen_swiotlb) {
-		xen_swiotlb_init(1);
+		xen_swiotlb_init(1, true /* early */);
 		dma_ops = &xen_swiotlb_dma_ops;
 
 		/* Make sure ACS will be enabled */
 		pci_request_acs();
 	}
 }
+
+int pci_xen_swiotlb_init_late(void)
+{
+	int rc;
+
+	if (xen_swiotlb)
+		return 0;
+
+	rc = xen_swiotlb_init(1, false /* late */);
+	if (rc)
+		return rc;
+
+	dma_ops = &xen_swiotlb_dma_ops;
+	/* Make sure ACS will be enabled */
+	pci_request_acs();
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
+
 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
 		  0,
 		  pci_xen_swiotlb_init,
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c
index 701b1035fa6f..7461edb5118e 100644
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -176,9 +176,9 @@ static const char *xen_swiotlb_error(enum xen_swiotlb_err err)
 	}
 	return "";
 }
-void __init xen_swiotlb_init(int verbose)
+int __ref xen_swiotlb_init(int verbose, bool early)
 {
-	unsigned long bytes;
+	unsigned long bytes, order;
 	int rc = -ENOMEM;
 	enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
 	unsigned int repeat = 3;
@@ -186,10 +186,28 @@ void __init xen_swiotlb_init(int verbose)
 	xen_io_tlb_nslabs = swiotlb_nr_tbl();
 retry:
 	bytes = xen_set_nslabs(xen_io_tlb_nslabs);
+	order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
 	/*
 	 * Get IO TLB memory from any location.
 	 */
-	xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+	if (early)
+		xen_io_tlb_start = alloc_bootmem_pages(PAGE_ALIGN(bytes));
+	else {
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+		while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+			xen_io_tlb_start = (void *)__get_free_pages(__GFP_NOWARN, order);
+			if (xen_io_tlb_start)
+				break;
+			order--;
+		}
+		if (order != get_order(bytes)) {
+			pr_warn("Warning: only able to allocate %ld MB "
+				"for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+			xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
+			bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
+		}
+	}
 	if (!xen_io_tlb_start) {
 		m_ret = XEN_SWIOTLB_ENOMEM;
 		goto error;
@@ -202,14 +220,21 @@ retry:
 			       bytes,
 			       xen_io_tlb_nslabs);
 	if (rc) {
-		free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+		if (early)
+			free_bootmem(__pa(xen_io_tlb_start), PAGE_ALIGN(bytes));
+		else {
+			free_pages((unsigned long)xen_io_tlb_start, order);
+			xen_io_tlb_start = NULL;
+		}
 		m_ret = XEN_SWIOTLB_EFIXUP;
 		goto error;
 	}
 	start_dma_addr = xen_virt_to_bus(xen_io_tlb_start);
-	swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
-
-	return;
+	if (early)
+		swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs, verbose);
+	else
+		rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
+	return rc;
 error:
 	if (repeat--) {
 		xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
@@ -218,10 +243,13 @@ error:
 		      (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
 		goto retry;
 	}
-	xen_raw_printk("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
-	panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+	pr_err("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+	if (early)
+		panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
+	else
+		free_pages((unsigned long)xen_io_tlb_start, order);
+	return rc;
 }
-
 void *
 xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 			   dma_addr_t *dma_handle, gfp_t flags,
diff --git a/include/xen/swiotlb-xen.h b/include/xen/swiotlb-xen.h
index 4f4d449f00f6..289ee509bda9 100644
--- a/include/xen/swiotlb-xen.h
+++ b/include/xen/swiotlb-xen.h
@@ -3,7 +3,7 @@
 
 #include <linux/swiotlb.h>
 
-extern void xen_swiotlb_init(int verbose);
+extern int xen_swiotlb_init(int verbose, bool early);
 
 extern void
 *xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-- 
cgit v1.2.3


From 2a3bce8f6afb9118a7ac3c360a5baf7cdaec87bc Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 13 Aug 2012 11:00:08 -0400
Subject: xen/swiotlb: Fix compile warnings when using plain integer instead of
 NULL pointer.

arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer
arch/x86/xen/pci-swiotlb-xen.c:96:1: warning: Using plain integer as NULL pointer

Acked-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/pci-swiotlb-xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c
index b152640d8388..1608244756de 100644
--- a/arch/x86/xen/pci-swiotlb-xen.c
+++ b/arch/x86/xen/pci-swiotlb-xen.c
@@ -97,6 +97,6 @@ int pci_xen_swiotlb_init_late(void)
 EXPORT_SYMBOL_GPL(pci_xen_swiotlb_init_late);
 
 IOMMU_INIT_FINISH(pci_xen_swiotlb_detect,
-		  0,
+		  NULL,
 		  pci_xen_swiotlb_init,
-		  0);
+		  NULL);
-- 
cgit v1.2.3


From e57dbaf77f372ac461b5b0b353c65efae9739a00 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 13 Sep 2011 15:23:21 +0200
Subject: x86, mce: Enable MCA support by default

MCA is the basic support for hardware error logging and reporting, and
it is majorly unwise to run without it so enable machine check software
support by default on x86.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Tony Luck <tony.luck@intel.com>
---
 arch/x86/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..3d2d2ef0e16a 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -871,6 +871,7 @@ config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
 
 config X86_MCE
 	bool "Machine Check / overheating reporting"
+	default y
 	---help---
 	  Machine Check support allows the processor to notify the
 	  kernel if it detects a problem (e.g. overheating, data corruption).
-- 
cgit v1.2.3


From 57639bedd2b8268daa791efcfd0367b9031b057d Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Tue, 22 May 2012 18:47:38 +0200
Subject: x86, MCE: Remove unused defines

Those were sitting there unused since the dawn of time, drop them.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/include/asm/mce.h | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index a3ac52b29cbf..ccaf7c581c8f 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -116,19 +116,9 @@ struct mce_log {
 /* Software defined banks */
 #define MCE_EXTENDED_BANK	128
 #define MCE_THERMAL_BANK	MCE_EXTENDED_BANK + 0
-
-#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)      /* MCE_AMD */
-#define K8_MCE_THRESHOLD_BANK_0    (MCE_THRESHOLD_BASE + 0 * 9)
-#define K8_MCE_THRESHOLD_BANK_1    (MCE_THRESHOLD_BASE + 1 * 9)
-#define K8_MCE_THRESHOLD_BANK_2    (MCE_THRESHOLD_BASE + 2 * 9)
-#define K8_MCE_THRESHOLD_BANK_3    (MCE_THRESHOLD_BASE + 3 * 9)
-#define K8_MCE_THRESHOLD_BANK_4    (MCE_THRESHOLD_BASE + 4 * 9)
-#define K8_MCE_THRESHOLD_BANK_5    (MCE_THRESHOLD_BASE + 5 * 9)
-#define K8_MCE_THRESHOLD_DRAM_ECC  (MCE_THRESHOLD_BANK_4 + 0)
-
+#define K8_MCE_THRESHOLD_BASE      (MCE_EXTENDED_BANK + 1)
 
 #ifdef __KERNEL__
-
 extern void mce_register_decode_chain(struct notifier_block *nb);
 extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
-- 
cgit v1.2.3


From 3ddbebf878ac8d958bb34e87a742a6b3adc283a3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 17 Sep 2012 13:22:53 +0200
Subject: PCI: Discard __init annotations for pci_fixup_irqs() and related
 functions

Remove the __init annotations in order to keep pci_fixup_irqs() around
after init (e.g. for hotplug). This requires the same change for the
implementation of pcibios_update_irq() on all architectures. While at
it, all __devinit annotations are removed as well, since they will be
useless now that HOTPLUG is always on.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/alpha/kernel/pci.c      | 2 +-
 arch/arm/kernel/bios32.c     | 2 +-
 arch/ia64/pci/pci.c          | 2 +-
 arch/mips/pci/pci.c          | 2 +-
 arch/sh/drivers/pci/pci.c    | 2 +-
 arch/sparc/kernel/leon_pci.c | 2 +-
 arch/tile/kernel/pci.c       | 2 +-
 arch/tile/kernel/pci_gx.c    | 2 +-
 arch/unicore32/kernel/pci.c  | 2 +-
 arch/x86/pci/visws.c         | 2 +-
 arch/xtensa/kernel/pci.c     | 2 +-
 drivers/pci/setup-irq.c      | 4 ++--
 12 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 9816d5a4d176..920392f61ef2 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -256,7 +256,7 @@ pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 2b2f25e7fef5..0174fe6effef 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -272,7 +272,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it
 
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	if (debug_pci)
 		printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev));
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 81acc7a57f3e..27db6a8afc44 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -461,7 +461,7 @@ void pcibios_set_master (struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-void __devinit
+void
 pcibios_update_irq (struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 690356808f8a..64f0419e5856 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -313,7 +313,7 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 40db2d0aef3f..1bd3e089b74f 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -192,7 +192,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 21dcda75a520..404621b775fc 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -102,7 +102,7 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 #ifdef CONFIG_PCI_DEBUG
 	printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 33c10864d2f7..6245bba8b1d6 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -406,7 +406,7 @@ void pcibios_set_master(struct pci_dev *dev)
 /*
  * This is called from the generic Linux layer.
  */
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 0e213e35ffc3..5faad0b1bd21 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1036,7 +1036,7 @@ char __devinit *pcibios_setup(char *str)
 /*
  * This is called from the generic Linux layer.
  */
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index 46cb6c9de6c9..c07ecc5baff6 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -154,7 +154,7 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size,
 	zhole_size[0] = 0;
 }
 
-void __devinit pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	if (debug_pci)
 		printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n",
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 6f2f8eeed171..9d736e7ff642 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -62,7 +62,7 @@ out:
 	return irq;
 }
 
-void __init pcibios_update_irq(struct pci_dev *dev, int irq)
+void pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
 }
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 69759e9cb3ea..6f9b40c47e99 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -212,7 +212,7 @@ void pcibios_set_master(struct pci_dev *dev)
 
 /* the next one is stolen from the alpha port... */
 
-void __init
+void
 pcibios_update_irq(struct pci_dev *dev, int irq)
 {
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index eb219a1d16f7..270ae7b97120 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -18,7 +18,7 @@
 #include <linux/cache.h>
 
 
-static void __init
+static void
 pdev_fixup_irq(struct pci_dev *dev,
 	       u8 (*swizzle)(struct pci_dev *, u8 *),
 	       int (*map_irq)(const struct pci_dev *, u8, u8))
@@ -54,7 +54,7 @@ pdev_fixup_irq(struct pci_dev *dev,
 	pcibios_update_irq(dev, irq);
 }
 
-void __init
+void
 pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *),
 	       int (*map_irq)(const struct pci_dev *, u8, u8))
 {
-- 
cgit v1.2.3


From 8885b7b637fa9aca7e1b00581a0173c6956966d3 Mon Sep 17 00:00:00 2001
From: Thierry Reding <thierry.reding@avionic-design.de>
Date: Mon, 17 Sep 2012 13:22:54 +0200
Subject: PCI: Provide a default pcibios_update_irq()

Most architectures implement this in exactly the same way. Instead of
having each architecture duplicate this function, provide a single
implementation in the core and make it a weak symbol so that it can be
overridden on architectures where it is required.

Signed-off-by: Thierry Reding <thierry.reding@avionic-design.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/alpha/kernel/pci.c      | 6 ------
 arch/arm/kernel/bios32.c     | 9 ---------
 arch/ia64/pci/pci.c          | 8 --------
 arch/m68k/kernel/pcibios.c   | 5 -----
 arch/mips/pci/pci.c          | 6 ------
 arch/sh/drivers/pci/pci.c    | 5 -----
 arch/sparc/kernel/leon_pci.c | 9 ---------
 arch/sparc/kernel/pci.c      | 4 ----
 arch/tile/kernel/pci.c       | 8 --------
 arch/tile/kernel/pci_gx.c    | 8 --------
 arch/unicore32/kernel/pci.c  | 8 --------
 arch/x86/pci/visws.c         | 5 -----
 arch/xtensa/kernel/pci.c     | 8 --------
 drivers/pci/setup-irq.c      | 5 +++++
 14 files changed, 5 insertions(+), 89 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/alpha/kernel/pci.c b/arch/alpha/kernel/pci.c
index 920392f61ef2..ef757147cbf9 100644
--- a/arch/alpha/kernel/pci.c
+++ b/arch/alpha/kernel/pci.c
@@ -256,12 +256,6 @@ pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int
 pcibios_enable_device(struct pci_dev *dev, int mask)
 {
diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c
index 0174fe6effef..9cf16b83bbb5 100644
--- a/arch/arm/kernel/bios32.c
+++ b/arch/arm/kernel/bios32.c
@@ -270,15 +270,6 @@ static void __devinit pci_fixup_it8152(struct pci_dev *dev)
 }
 DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152);
 
-
-
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	if (debug_pci)
-		printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev));
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * If the bus contains any of these devices, then we must not turn on
  * parity checking of any kind.  Currently this is CyberPro 20x0 only.
diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c
index 27db6a8afc44..a7ebe9440271 100644
--- a/arch/ia64/pci/pci.c
+++ b/arch/ia64/pci/pci.c
@@ -461,14 +461,6 @@ void pcibios_set_master (struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-void
-pcibios_update_irq (struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-
-	/* ??? FIXME -- record old value for shutdown.  */
-}
-
 int
 pcibios_enable_device (struct pci_dev *dev, int mask)
 {
diff --git a/arch/m68k/kernel/pcibios.c b/arch/m68k/kernel/pcibios.c
index b2988aa1840b..73fa0b56a06c 100644
--- a/arch/m68k/kernel/pcibios.c
+++ b/arch/m68k/kernel/pcibios.c
@@ -87,11 +87,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return 0;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 {
 	struct pci_dev *dev;
diff --git a/arch/mips/pci/pci.c b/arch/mips/pci/pci.c
index 64f0419e5856..04e35bcde07c 100644
--- a/arch/mips/pci/pci.c
+++ b/arch/mips/pci/pci.c
@@ -313,12 +313,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 	}
 }
 
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 #ifdef CONFIG_HOTPLUG
 EXPORT_SYMBOL(PCIBIOS_MIN_IO);
 EXPORT_SYMBOL(PCIBIOS_MIN_MEM);
diff --git a/arch/sh/drivers/pci/pci.c b/arch/sh/drivers/pci/pci.c
index 1bd3e089b74f..a7e078f2e2e4 100644
--- a/arch/sh/drivers/pci/pci.c
+++ b/arch/sh/drivers/pci/pci.c
@@ -192,11 +192,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 static void __init
 pcibios_bus_report_status_early(struct pci_channel *hose,
 				int top_bus, int current_bus,
diff --git a/arch/sparc/kernel/leon_pci.c b/arch/sparc/kernel/leon_pci.c
index 404621b775fc..fc0521161568 100644
--- a/arch/sparc/kernel/leon_pci.c
+++ b/arch/sparc/kernel/leon_pci.c
@@ -102,15 +102,6 @@ int pcibios_enable_device(struct pci_dev *dev, int mask)
 	return pci_enable_resources(dev, mask);
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-#ifdef CONFIG_PCI_DEBUG
-	printk(KERN_DEBUG "LEONPCI: Assigning IRQ %02d to %s\n", irq,
-		pci_name(dev));
-#endif
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /* in/out routines taken from pcic.c
  *
  * This probably belongs here rather than ioport.c because
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index 065b88c4f868..acc8c838ff72 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -622,10 +622,6 @@ void __devinit pcibios_fixup_bus(struct pci_bus *pbus)
 {
 }
 
-void pcibios_update_irq(struct pci_dev *pdev, int irq)
-{
-}
-
 resource_size_t pcibios_align_resource(void *data, const struct resource *res,
 				resource_size_t size, resource_size_t align)
 {
diff --git a/arch/tile/kernel/pci.c b/arch/tile/kernel/pci.c
index 6245bba8b1d6..dbdab34f27cb 100644
--- a/arch/tile/kernel/pci.c
+++ b/arch/tile/kernel/pci.c
@@ -403,14 +403,6 @@ void pcibios_set_master(struct pci_dev *dev)
 	/* No special bus mastering setup handling. */
 }
 
-/*
- * This is called from the generic Linux layer.
- */
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * Enable memory and/or address decoding, as appropriate, for the
  * device described by the 'dev' struct.
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index 5faad0b1bd21..2ba6d052f85d 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1033,14 +1033,6 @@ char __devinit *pcibios_setup(char *str)
 	return str;
 }
 
-/*
- * This is called from the generic Linux layer.
- */
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * Enable memory address decoding, as appropriate, for the
  * device described by the 'dev' struct. The I/O decoding
diff --git a/arch/unicore32/kernel/pci.c b/arch/unicore32/kernel/pci.c
index c07ecc5baff6..b0056f68d321 100644
--- a/arch/unicore32/kernel/pci.c
+++ b/arch/unicore32/kernel/pci.c
@@ -154,14 +154,6 @@ void __init puv3_pci_adjust_zones(unsigned long *zone_size,
 	zhole_size[0] = 0;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	if (debug_pci)
-		printk(KERN_DEBUG "PCI: Assigning IRQ %02d to %s\n",
-				irq, pci_name(dev));
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 /*
  * If the bus contains any of these devices, then we must not turn on
  * parity checking of any kind.
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c
index 9d736e7ff642..3e6d2a6db866 100644
--- a/arch/x86/pci/visws.c
+++ b/arch/x86/pci/visws.c
@@ -62,11 +62,6 @@ out:
 	return irq;
 }
 
-void pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int __init pci_visws_init(void)
 {
 	pcibios_enable_irq = &pci_visws_enable_irq;
diff --git a/arch/xtensa/kernel/pci.c b/arch/xtensa/kernel/pci.c
index 6f9b40c47e99..54354de38a70 100644
--- a/arch/xtensa/kernel/pci.c
+++ b/arch/xtensa/kernel/pci.c
@@ -210,14 +210,6 @@ void pcibios_set_master(struct pci_dev *dev)
 	/* No special bus mastering setup handling */
 }
 
-/* the next one is stolen from the alpha port... */
-
-void
-pcibios_update_irq(struct pci_dev *dev, int irq)
-{
-	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
-}
-
 int pcibios_enable_device(struct pci_dev *dev, int mask)
 {
 	u16 cmd, old_cmd;
diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c
index 270ae7b97120..9bd6864ec5d3 100644
--- a/drivers/pci/setup-irq.c
+++ b/drivers/pci/setup-irq.c
@@ -17,6 +17,11 @@
 #include <linux/ioport.h>
 #include <linux/cache.h>
 
+void __weak pcibios_update_irq(struct pci_dev *dev, int irq)
+{
+	dev_dbg(&dev->dev, "assigning IRQ %02d\n", irq);
+	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq);
+}
 
 static void
 pdev_fixup_irq(struct pci_dev *dev,
-- 
cgit v1.2.3


From 1e6dd8adc78d4a153db253d051fd4ef6c49c9019 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 5 Sep 2012 15:31:26 +0300
Subject: perf: Fix off by one test in perf_reg_value()

The test should be >= ARRAY_SIZE() instead of > ARRAY_SIZE().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Link: http://lkml.kernel.org/r/20120905123126.GC6128@elgon.mountain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/perf_regs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index c5a3e5cfe07f..e309cc5c276e 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -57,7 +57,7 @@ static unsigned int pt_regs_offset[PERF_REG_X86_MAX] = {
 
 u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
-	if (WARN_ON_ONCE(idx > ARRAY_SIZE(pt_regs_offset)))
+	if (WARN_ON_ONCE(idx >= ARRAY_SIZE(pt_regs_offset)))
 		return 0;
 
 	return regs_get_register(regs, pt_regs_offset[idx]);
-- 
cgit v1.2.3


From 924e101a7ab6f884047f4344e5f1154a4bcd63a6 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Fri, 14 Sep 2012 18:37:46 +0200
Subject: x86/debug: Dump family, model, stepping of the boot CPU

When acting on a user bug report, we find ourselves constantly
asking for /proc/cpuinfo in order to know the exact family,
model, stepping of the CPU in question.

Instead of having to ask this, add this to dmesg so that it is
visible and no ambiguities can ensue from looking at the
official name string of the CPU coming from CPUID and trying
to map it to f/m/s.

Output then looks like this:

[    0.146041] smpboot: CPU0: AMD FX(tm)-8100 Eight-Core Processor (fam: 15, model: 01, stepping: 02)

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Link: http://lkml.kernel.org/r/1347640666-13638-1-git-send-email-bp@amd64.org
[ tweaked it minimally to add commas. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/common.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a5fbc3c5fccc..1cc48ff91cb3 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1023,14 +1023,16 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
 		printk(KERN_CONT "%s ", vendor);
 
 	if (c->x86_model_id[0])
-		printk(KERN_CONT "%s", c->x86_model_id);
+		printk(KERN_CONT "%s", strim(c->x86_model_id));
 	else
 		printk(KERN_CONT "%d86", c->x86);
 
+	printk(KERN_CONT " (fam: %02x, model: %02x", c->x86, c->x86_model);
+
 	if (c->x86_mask || c->cpuid_level >= 0)
-		printk(KERN_CONT " stepping %02x\n", c->x86_mask);
+		printk(KERN_CONT ", stepping: %02x)\n", c->x86_mask);
 	else
-		printk(KERN_CONT "\n");
+		printk(KERN_CONT ")\n");
 
 	print_cpu_msr(c);
 }
-- 
cgit v1.2.3


From e26a44a2d618a491d5c6a2a8aaf66ee03a94739f Mon Sep 17 00:00:00 2001
From: Jan Beulich <JBeulich@suse.com>
Date: Tue, 18 Sep 2012 12:16:14 +0100
Subject: x86: Use REP BSF unconditionally

Make "REP BSF" unconditional, as per the suggestion of hpa
and Linus, this removes the insane BSF_PREFIX conditional
and simplifies the logic.

Suggested-by: "H. Peter Anvin" <hpa@zytor.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Link: http://lkml.kernel.org/r/5058741E020000780009C014@nat28.tlf.novell.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/bitops.h | 19 ++-----------------
 1 file changed, 2 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index b2af6645ea7e..6dfd0195bb55 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -347,19 +347,6 @@ static int test_bit(int nr, const volatile unsigned long *addr);
 	 ? constant_test_bit((nr), (addr))	\
 	 : variable_test_bit((nr), (addr)))
 
-#if (defined(CONFIG_X86_GENERIC) || defined(CONFIG_GENERIC_CPU)) \
-    && !defined(CONFIG_CC_OPTIMIZE_FOR_SIZE)
-/*
- * Since BSF and TZCNT have sufficiently similar semantics for the purposes
- * for which we use them here, BMI-capable hardware will decode the prefixed
- * variant as 'tzcnt ...' and may execute that faster than 'bsf ...', while
- * older hardware will ignore the REP prefix and decode it as 'bsf ...'.
- */
-# define BSF_PREFIX "rep;"
-#else
-# define BSF_PREFIX
-#endif
-
 /**
  * __ffs - find first set bit in word
  * @word: The word to search
@@ -368,7 +355,7 @@ static int test_bit(int nr, const volatile unsigned long *addr);
  */
 static inline unsigned long __ffs(unsigned long word)
 {
-	asm(BSF_PREFIX "bsf %1,%0"
+	asm("rep; bsf %1,%0"
 		: "=r" (word)
 		: "rm" (word));
 	return word;
@@ -382,14 +369,12 @@ static inline unsigned long __ffs(unsigned long word)
  */
 static inline unsigned long ffz(unsigned long word)
 {
-	asm(BSF_PREFIX "bsf %1,%0"
+	asm("rep; bsf %1,%0"
 		: "=r" (word)
 		: "r" (~word));
 	return word;
 }
 
-#undef BSF_PREFIX
-
 /*
  * __fls: find last set bit in word
  * @word: The word to search
-- 
cgit v1.2.3


From 20a36e39d59757252edbbdcf9574ae2998733ce9 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Tue, 11 Sep 2012 01:07:01 +0200
Subject: perf/x86: Fix Intel Ivy Bridge support

This patch updates the existing Intel IvyBridge (model 58)
support with proper PEBS event constraints. It cannot reuse
the same as SandyBridge because some events (0xd3) are
specific to IvyBridge.

Also there is no UOPS_DISPATCHED.THREAD on IVB, so do not
populate the PERF_COUNT_HW_STALLED_CYCLES_BACKEND mapping.

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: peterz@infradead.org
Cc: ak@linux.intel.com
Link: http://lkml.kernel.org/r/20120910230701.GA5898@quad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          |  2 ++
 arch/x86/kernel/cpu/perf_event_intel.c    | 24 +++++++++++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 14 ++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 6605a81ba339..8b6defe7eefc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -586,6 +586,8 @@ extern struct event_constraint intel_westmere_pebs_event_constraints[];
 
 extern struct event_constraint intel_snb_pebs_event_constraints[];
 
+extern struct event_constraint intel_ivb_pebs_event_constraints[];
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event);
 
 void intel_pmu_pebs_enable(struct perf_event *event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0d3d63afa76a..6bca492b8547 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2048,7 +2048,6 @@ __init int intel_pmu_init(void)
 	case 42: /* SandyBridge */
 	case 45: /* SandyBridge, "Romely-EP" */
 		x86_add_quirk(intel_sandybridge_quirk);
-	case 58: /* IvyBridge */
 		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
@@ -2073,6 +2072,29 @@ __init int intel_pmu_init(void)
 
 		pr_cont("SandyBridge events, ");
 		break;
+	case 58: /* IvyBridge */
+		memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, snb_hw_cache_extra_regs,
+		       sizeof(hw_cache_extra_regs));
+
+		intel_pmu_lbr_init_snb();
+
+		x86_pmu.event_constraints = intel_snb_event_constraints;
+		x86_pmu.pebs_constraints = intel_ivb_pebs_event_constraints;
+		x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+		x86_pmu.extra_regs = intel_snb_extra_regs;
+		/* all extra regs are per-cpu when HT is on */
+		x86_pmu.er_flags |= ERF_HAS_RSP_1;
+		x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+		/* UOPS_ISSUED.ANY,c=1,i=1 to count stall cycles */
+		intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] =
+			X86_CONFIG(.event=0x0e, .umask=0x01, .inv=1, .cmask=1);
+
+		pr_cont("IvyBridge events, ");
+		break;
+
 
 	default:
 		switch (x86_pmu.version) {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e38d97bf4259..826054a4f2ee 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -407,6 +407,20 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
 	EVENT_CONSTRAINT_END
 };
 
+struct event_constraint intel_ivb_pebs_event_constraints[] = {
+        INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+        INTEL_UEVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+        INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+        INTEL_EVENT_CONSTRAINT(0xc4, 0xf),    /* BR_INST_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xc5, 0xf),    /* BR_MISP_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xcd, 0x8),    /* MEM_TRANS_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+        INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
+        EVENT_CONSTRAINT_END
+};
+
 struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
-- 
cgit v1.2.3


From 4b8073e467e6a66b6a5a8e799d28bc3b243c0d78 Mon Sep 17 00:00:00 2001
From: Peter Senna Tschudin <peter.senna@gmail.com>
Date: Tue, 18 Sep 2012 18:36:14 +0200
Subject: arch/x86: Remove unecessary semicolons

Found by http://coccinelle.lip6.fr/

Signed-off-by: Peter Senna Tschudin <peter.senna@gmail.com>
Cc: avi@redhat.com
Cc: mtosatti@redhat.com
Cc: a.p.zijlstra@chello.nl
Cc: rusty@rustcorp.com.au
Cc: masami.hiramatsu.pt@hitachi.com
Cc: suresh.b.siddha@intel.com
Cc: joerg.roedel@amd.com
Cc: agordeev@redhat.com
Cc: yinghai@kernel.org
Cc: bhelgaas@google.com
Cc: liuj97@gmail.com
Link: http://lkml.kernel.org/r/1347986174-30287-7-git-send-email-peter.senna@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/alternative.c  | 4 ++--
 arch/x86/kernel/apic/apic.c    | 2 +-
 arch/x86/kvm/vmx.c             | 2 +-
 arch/x86/pci/mmconfig-shared.c | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index ced4534baed5..3318b1e53e06 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -317,7 +317,7 @@ static void alternatives_smp_lock(const s32 *start, const s32 *end,
 		/* turn DS segment override prefix into lock prefix */
 		if (*ptr == 0x3e)
 			text_poke(ptr, ((unsigned char []){0xf0}), 1);
-	};
+	}
 	mutex_unlock(&text_mutex);
 }
 
@@ -338,7 +338,7 @@ static void alternatives_smp_unlock(const s32 *start, const s32 *end,
 		/* turn lock prefix into DS segment override prefix */
 		if (*ptr == 0xf0)
 			text_poke(ptr, ((unsigned char []){0x3E}), 1);
-	};
+	}
 	mutex_unlock(&text_mutex);
 }
 
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 24deb3082328..b17416e72fbd 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1934,7 +1934,7 @@ void smp_error_interrupt(struct pt_regs *regs)
 			apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]);
 		i++;
 		v1 >>= 1;
-	};
+	}
 
 	apic_printk(APIC_DEBUG, KERN_CONT "\n");
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b1eb202ee76a..b06737d122f4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4543,7 +4543,7 @@ static int handle_cr(struct kvm_vcpu *vcpu)
 				vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
 				return 0;
 			}
-		};
+		}
 		break;
 	case 2: /* clts */
 		handle_clts(vcpu);
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index 937bcece7006..704b9ec043d7 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -585,7 +585,7 @@ static int __init pci_parse_mcfg(struct acpi_table_header *header)
 	while (i >= sizeof(struct acpi_mcfg_allocation)) {
 		entries++;
 		i -= sizeof(struct acpi_mcfg_allocation);
-	};
+	}
 	if (entries == 0) {
 		pr_err(PREFIX "MMCONFIG has no entries\n");
 		return -ENODEV;
-- 
cgit v1.2.3


From 2d297480037e1d9100ca504737820c1bf65db6c0 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 5 Sep 2012 15:30:42 +0300
Subject: x86, microcode, AMD: Fix use after free in free_cache()

list_for_each_entry_reverse() dereferences the iterator, but we already
freed it. I don't see a reason that this has to be done in reverse order
so change it to use list_for_each_entry_safe().

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/kernel/microcode_amd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c
index 5511216b4434..7720ff5a9ee2 100644
--- a/arch/x86/kernel/microcode_amd.c
+++ b/arch/x86/kernel/microcode_amd.c
@@ -150,9 +150,9 @@ static void update_cache(struct ucode_patch *new_patch)
 
 static void free_cache(void)
 {
-	struct ucode_patch *p;
+	struct ucode_patch *p, *tmp;
 
-	list_for_each_entry_reverse(p, &pcache, plist) {
+	list_for_each_entry_safe(p, tmp, &pcache, plist) {
 		__list_del(p->plist.prev, p->plist.next);
 		kfree(p->data);
 		kfree(p);
-- 
cgit v1.2.3


From bd49940a35ec7d488ae63bd625639893b3385b97 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 19 Sep 2012 08:30:55 -0400
Subject: xen/boot: Disable BIOS SMP MP table search.

As the initial domain we are able to search/map certain regions
of memory to harvest configuration data. For all low-level we
use ACPI tables - for interrupts we use exclusively ACPI _PRT
(so DSDT) and MADT for INT_SRC_OVR.

The SMP MP table is not used at all. As a matter of fact we do
not even support machines that only have SMP MP but no ACPI tables.

Lets follow how Moorestown does it and just disable searching
for BIOS SMP tables.

This also fixes an issue on HP Proliant BL680c G5 and DL380 G6:

9f->100 for 1:1 PTE
Freeing 9f-100 pfn range: 97 pages freed
1-1 mapping on 9f->100
.. snip..
e820: BIOS-provided physical RAM map:
Xen: [mem 0x0000000000000000-0x000000000009efff] usable
Xen: [mem 0x000000000009f400-0x00000000000fffff] reserved
Xen: [mem 0x0000000000100000-0x00000000cfd1dfff] usable
.. snip..
Scan for SMP in [mem 0x00000000-0x000003ff]
Scan for SMP in [mem 0x0009fc00-0x0009ffff]
Scan for SMP in [mem 0x000f0000-0x000fffff]
found SMP MP-table at [mem 0x000f4fa0-0x000f4faf] mapped at [ffff8800000f4fa0]
(XEN) mm.c:908:d0 Error getting mfn 100 (pfn 5555555555555555) from L1 entry 0000000000100461 for l1e_owner=0, pg_owner=0
(XEN) mm.c:4995:d0 ptwr_emulate: could not get_page_from_l1e()
BUG: unable to handle kernel NULL pointer dereference at           (null)
IP: [<ffffffff81ac07e2>] xen_set_pte_init+0x66/0x71
. snip..
Pid: 0, comm: swapper Not tainted 3.6.0-rc6upstream-00188-gb6fb969-dirty #2 HP ProLiant BL680c G5
.. snip..
Call Trace:
 [<ffffffff81ad31c6>] __early_ioremap+0x18a/0x248
 [<ffffffff81624731>] ? printk+0x48/0x4a
 [<ffffffff81ad32ac>] early_ioremap+0x13/0x15
 [<ffffffff81acc140>] get_mpc_size+0x2f/0x67
 [<ffffffff81acc284>] smp_scan_config+0x10c/0x136
 [<ffffffff81acc2e4>] default_find_smp_config+0x36/0x5a
 [<ffffffff81ac3085>] setup_arch+0x5b3/0xb5b
 [<ffffffff81624731>] ? printk+0x48/0x4a
 [<ffffffff81abca7f>] start_kernel+0x90/0x390
 [<ffffffff81abc356>] x86_64_start_reservations+0x131/0x136
 [<ffffffff81abfa83>] xen_start_kernel+0x65f/0x661
(XEN) Domain 0 crashed: 'noreboot' set - not rebooting.

which is that ioremap would end up mapping 0xff using _PAGE_IOMAP
(which is what early_ioremap sticks as a flag) - which meant
we would get MFN 0xFF (pte ff461, which is OK), and then it would
also map 0x100 (b/c ioremap tries to get page aligned request, and
it was trying to map 0xf4fa0 + PAGE_SIZE - so it mapped the next page)
as _PAGE_IOMAP. Since 0x100 is actually a RAM page, and the _PAGE_IOMAP
bypasses the P2M lookup we would happily set the PTE to 1000461.
Xen would deny the request since we do not have access to the
Machine Frame Number (MFN) of 0x100. The P2M[0x100] is for example
0x80140.

CC: stable@vger.kernel.org
Fixes-Oracle-Bugzilla: https://bugzilla.oracle.com/bugzilla/show_bug.cgi?id=13665
Acked-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 9642d4a38602..1fbe75a95f15 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1452,6 +1452,10 @@ asmlinkage void __init xen_start_kernel(void)
 		pci_request_acs();
 
 		xen_acpi_sleep_register();
+
+		/* Avoid searching for BIOS MP tables */
+		x86_init.mpparse.find_smp_config = x86_init_noop;
+		x86_init.mpparse.get_smp_config = x86_init_uint_noop;
 	}
 #ifdef CONFIG_PCI
 	/* PCI BIOS service won't work from a PV guest. */
-- 
cgit v1.2.3


From 8ea667f259e3767fd3ee85a885c14e417835695e Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 13:44:53 +0300
Subject: KVM: MMU: Push clean gpte write protection out of gpte_access()

gpte_access() computes the access permissions of a guest pte and also
write-protects clean gptes.  This is wrong when we are servicing a
write fault (since we'll be setting the dirty bit momentarily) but
correct when instantiating a speculative spte, or when servicing a
read fault (since we'll want to trap a following write in order to
set the dirty bit).

It doesn't seem to hurt in practice, but in order to make the code
readable, push the write protection out of gpte_access() and into
a new protect_clean_gpte() which is called explicitly when needed.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 12 ++++++++++++
 arch/x86/kvm/mmu.h         |  3 ++-
 arch/x86/kvm/paging_tmpl.h | 24 ++++++++++++------------
 3 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index aa0b469ee07d..54c9cb4fdfa4 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3408,6 +3408,18 @@ static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level)
 	return (gpte & mmu->rsvd_bits_mask[bit7][level-1]) != 0;
 }
 
+static inline void protect_clean_gpte(unsigned *access, unsigned gpte)
+{
+	unsigned mask;
+
+	BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
+
+	mask = (unsigned)~ACC_WRITE_MASK;
+	/* Allow write access to dirty gptes */
+	mask |= (gpte >> (PT_DIRTY_SHIFT - PT_WRITABLE_SHIFT)) & PT_WRITABLE_MASK;
+	*access &= mask;
+}
+
 static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
 			   int *nr_present)
 {
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index e374db9af021..2832081e9b2e 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -18,7 +18,8 @@
 #define PT_PCD_MASK (1ULL << 4)
 #define PT_ACCESSED_SHIFT 5
 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
-#define PT_DIRTY_MASK (1ULL << 6)
+#define PT_DIRTY_SHIFT 6
+#define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
 #define PT_PAGE_SIZE_MASK (1ULL << 7)
 #define PT_PAT_MASK (1ULL << 7)
 #define PT_GLOBAL_MASK (1ULL << 8)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf8c42bf50fe..bf7b4ffafab8 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -101,14 +101,11 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte,
-				   bool last)
+static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 {
 	unsigned access;
 
 	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-	if (last && !is_dirty_gpte(gpte))
-		access &= ~ACC_WRITE_MASK;
 
 #if PTTYPE == 64
 	if (vcpu->arch.mmu.nx)
@@ -222,8 +219,7 @@ retry_walk:
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
 		if (last_gpte) {
-			pte_access = pt_access &
-				     FNAME(gpte_access)(vcpu, pte, true);
+			pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
 			/* check if the kernel is fetching from user page */
 			if (unlikely(pte_access & PT_USER_MASK) &&
 			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
@@ -274,7 +270,7 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= FNAME(gpte_access)(vcpu, pte, false);
+		pt_access &= FNAME(gpte_access)(vcpu, pte);
 		--walker->level;
 	}
 
@@ -283,7 +279,9 @@ retry_walk:
 		goto error;
 	}
 
-	if (write_fault && unlikely(!is_dirty_gpte(pte))) {
+	if (!write_fault)
+		protect_clean_gpte(&pte_access, pte);
+	else if (unlikely(!is_dirty_gpte(pte))) {
 		int ret;
 
 		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
@@ -368,7 +366,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return;
 
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte, true);
+	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	protect_clean_gpte(&pte_access, gpte);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
 	if (mmu_invalid_pfn(pfn))
 		return;
@@ -441,8 +440,8 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 			continue;
 
-		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte,
-								  true);
+		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+		protect_clean_gpte(&pte_access, gpte);
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
 				      pte_access & ACC_WRITE_MASK);
@@ -794,7 +793,8 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(vcpu, gpte, true);
+		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		protect_clean_gpte(&pte_access, gpte);
 
 		if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
 			continue;
-- 
cgit v1.2.3


From edc2ae84eb40a3c062210fe01af1cae1633cc810 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 13:53:08 +0300
Subject: KVM: MMU: Optimize gpte_access() slightly

If nx is disabled, then is gpte[63] is set we will hit a reserved
bit set fault before checking permissions; so we can ignore the
setting of efer.nxe.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index bf7b4ffafab8..064bcb32d84e 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -106,10 +106,8 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
 	unsigned access;
 
 	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-
 #if PTTYPE == 64
-	if (vcpu->arch.mmu.nx)
-		access &= ~(gpte >> PT64_NX_SHIFT);
+	access &= ~(gpte >> PT64_NX_SHIFT);
 #endif
 	return access;
 }
-- 
cgit v1.2.3


From 3d34adec7081621ff51c195be045b87d75c0c49d Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 14:03:28 +0300
Subject: KVM: MMU: Move gpte_access() out of paging_tmpl.h

We no longer rely on paging_tmpl.h defines; so we can move the function
to mmu.c.

Rely on zero extension to 64 bits to get the correct nx behaviour.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c         | 10 ++++++++++
 arch/x86/kvm/paging_tmpl.h | 21 +++++----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 54c9cb4fdfa4..f297a2ccf4f6 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3437,6 +3437,16 @@ static bool sync_mmio_spte(u64 *sptep, gfn_t gfn, unsigned access,
 	return false;
 }
 
+static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
+{
+	unsigned access;
+
+	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
+	access &= ~(gpte >> PT64_NX_SHIFT);
+
+	return access;
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 064bcb32d84e..1cbf576852ca 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -101,17 +101,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte)
-{
-	unsigned access;
-
-	access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK;
-#if PTTYPE == 64
-	access &= ~(gpte >> PT64_NX_SHIFT);
-#endif
-	return access;
-}
-
 static bool FNAME(is_last_gpte)(struct guest_walker *walker,
 				struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				pt_element_t gpte)
@@ -217,7 +206,7 @@ retry_walk:
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
 		if (last_gpte) {
-			pte_access = pt_access & FNAME(gpte_access)(vcpu, pte);
+			pte_access = pt_access & gpte_access(vcpu, pte);
 			/* check if the kernel is fetching from user page */
 			if (unlikely(pte_access & PT_USER_MASK) &&
 			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
@@ -268,7 +257,7 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= FNAME(gpte_access)(vcpu, pte);
+		pt_access &= gpte_access(vcpu, pte);
 		--walker->level;
 	}
 
@@ -364,7 +353,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 		return;
 
 	pgprintk("%s: gpte %llx spte %p\n", __func__, (u64)gpte, spte);
-	pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+	pte_access = sp->role.access & gpte_access(vcpu, gpte);
 	protect_clean_gpte(&pte_access, gpte);
 	pfn = gfn_to_pfn_atomic(vcpu->kvm, gpte_to_gfn(gpte));
 	if (mmu_invalid_pfn(pfn))
@@ -438,7 +427,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
 		if (FNAME(prefetch_invalid_gpte)(vcpu, sp, spte, gpte))
 			continue;
 
-		pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
+		pte_access = sp->role.access & gpte_access(vcpu, gpte);
 		protect_clean_gpte(&pte_access, gpte);
 		gfn = gpte_to_gfn(gpte);
 		pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
@@ -791,7 +780,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 
 		gfn = gpte_to_gfn(gpte);
 		pte_access = sp->role.access;
-		pte_access &= FNAME(gpte_access)(vcpu, gpte);
+		pte_access &= gpte_access(vcpu, gpte);
 		protect_clean_gpte(&pte_access, gpte);
 
 		if (sync_mmio_spte(&sp->spt[i], gfn, pte_access, &nr_present))
-- 
cgit v1.2.3


From 8cbc70696f149e44753b0fe60162b4ff96c2dd2b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 14:18:51 +0300
Subject: KVM: MMU: Update accessed and dirty bits after guest pagetable walk

While unspecified, the behaviour of Intel processors is to first
perform the page table walk, then, if the walk was successful, to
atomically update the accessed and dirty bits of walked paging elements.

While we are not required to follow this exactly, doing so will allow us
to perform the access permissions check after the walk is complete, rather
than after each walk step.

(the tricky case is SMEP: a zero in any pte's U bit makes the referenced
page a supervisor page, so we can't fault on a one bit during the walk
itself).

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 76 ++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 29 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1cbf576852ca..35a05dd2f69c 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -63,10 +63,12 @@
  */
 struct guest_walker {
 	int level;
+	unsigned max_level;
 	gfn_t table_gfn[PT_MAX_FULL_LEVELS];
 	pt_element_t ptes[PT_MAX_FULL_LEVELS];
 	pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
 	gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
+	pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
 	unsigned pt_access;
 	unsigned pte_access;
 	gfn_t gfn;
@@ -119,6 +121,43 @@ static bool FNAME(is_last_gpte)(struct guest_walker *walker,
 	return false;
 }
 
+static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
+					     struct kvm_mmu *mmu,
+					     struct guest_walker *walker,
+					     int write_fault)
+{
+	unsigned level, index;
+	pt_element_t pte, orig_pte;
+	pt_element_t __user *ptep_user;
+	gfn_t table_gfn;
+	int ret;
+
+	for (level = walker->max_level; level >= walker->level; --level) {
+		pte = orig_pte = walker->ptes[level - 1];
+		table_gfn = walker->table_gfn[level - 1];
+		ptep_user = walker->ptep_user[level - 1];
+		index = offset_in_page(ptep_user) / sizeof(pt_element_t);
+		if (!(pte & PT_ACCESSED_MASK)) {
+			trace_kvm_mmu_set_accessed_bit(table_gfn, index, sizeof(pte));
+			pte |= PT_ACCESSED_MASK;
+		}
+		if (level == walker->level && write_fault && !is_dirty_gpte(pte)) {
+			trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
+			pte |= PT_DIRTY_MASK;
+		}
+		if (pte == orig_pte)
+			continue;
+
+		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
+		if (ret)
+			return ret;
+
+		mark_page_dirty(vcpu->kvm, table_gfn);
+		walker->ptes[level] = pte;
+	}
+	return 0;
+}
+
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -126,6 +165,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 				    struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 				    gva_t addr, u32 access)
 {
+	int ret;
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
@@ -153,6 +193,7 @@ retry_walk:
 		--walker->level;
 	}
 #endif
+	walker->max_level = walker->level;
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
@@ -183,6 +224,7 @@ retry_walk:
 		ptep_user = (pt_element_t __user *)((void *)host_addr + offset);
 		if (unlikely(__copy_from_user(&pte, ptep_user, sizeof(pte))))
 			goto error;
+		walker->ptep_user[walker->level - 1] = ptep_user;
 
 		trace_kvm_mmu_paging_element(pte, walker->level);
 
@@ -214,21 +256,6 @@ retry_walk:
 					eperm = true;
 		}
 
-		if (!eperm && unlikely(!(pte & PT_ACCESSED_MASK))) {
-			int ret;
-			trace_kvm_mmu_set_accessed_bit(table_gfn, index,
-						       sizeof(pte));
-			ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-						  pte, pte|PT_ACCESSED_MASK);
-			if (unlikely(ret < 0))
-				goto error;
-			else if (ret)
-				goto retry_walk;
-
-			mark_page_dirty(vcpu->kvm, table_gfn);
-			pte |= PT_ACCESSED_MASK;
-		}
-
 		walker->ptes[walker->level - 1] = pte;
 
 		if (last_gpte) {
@@ -268,21 +295,12 @@ retry_walk:
 
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
-	else if (unlikely(!is_dirty_gpte(pte))) {
-		int ret;
 
-		trace_kvm_mmu_set_dirty_bit(table_gfn, index, sizeof(pte));
-		ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index,
-					  pte, pte|PT_DIRTY_MASK);
-		if (unlikely(ret < 0))
-			goto error;
-		else if (ret)
-			goto retry_walk;
-
-		mark_page_dirty(vcpu->kvm, table_gfn);
-		pte |= PT_DIRTY_MASK;
-		walker->ptes[walker->level - 1] = pte;
-	}
+	ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+	if (unlikely(ret < 0))
+		goto error;
+	else if (ret)
+		goto retry_walk;
 
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
-- 
cgit v1.2.3


From 97d64b788114be1c4dc4bfe7a8ba2bf9643fe6af Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 14:52:00 +0300
Subject: KVM: MMU: Optimize pte permission checks

walk_addr_generic() permission checks are a maze of branchy code, which is
performed four times per lookup.  It depends on the type of access, efer.nxe,
cr0.wp, cr4.smep, and in the near future, cr4.smap.

Optimize this away by precalculating all variants and storing them in a
bitmap.  The bitmap is recalculated when rarely-changing variables change
(cr0, cr4) and is indexed by the often-changing variables (page fault error
code, pte access permissions).

The permission check is moved to the end of the loop, otherwise an SMEP
fault could be reported as a false positive, when PDE.U=1 but PTE.U=0.
Noted by Xiao Guangrong.

The result is short, branch-free code.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu.c              | 38 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              | 19 ++++++++-----------
 arch/x86/kvm/paging_tmpl.h      | 22 ++++------------------
 arch/x86/kvm/x86.c              | 11 ++++-------
 5 files changed, 61 insertions(+), 36 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 64adb6117e19..3318bde206a5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -287,6 +287,13 @@ struct kvm_mmu {
 	union kvm_mmu_page_role base_role;
 	bool direct_map;
 
+	/*
+	 * Bitmap; bit set = permission fault
+	 * Byte index: page fault error code [4:1]
+	 * Bit index: pte permissions in ACC_* format
+	 */
+	u8 permissions[16];
+
 	u64 *pae_root;
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index f297a2ccf4f6..9c6188931f87 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3516,6 +3516,38 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu,
 	}
 }
 
+static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	unsigned bit, byte, pfec;
+	u8 map;
+	bool fault, x, w, u, wf, uf, ff, smep;
+
+	smep = kvm_read_cr4_bits(vcpu, X86_CR4_SMEP);
+	for (byte = 0; byte < ARRAY_SIZE(mmu->permissions); ++byte) {
+		pfec = byte << 1;
+		map = 0;
+		wf = pfec & PFERR_WRITE_MASK;
+		uf = pfec & PFERR_USER_MASK;
+		ff = pfec & PFERR_FETCH_MASK;
+		for (bit = 0; bit < 8; ++bit) {
+			x = bit & ACC_EXEC_MASK;
+			w = bit & ACC_WRITE_MASK;
+			u = bit & ACC_USER_MASK;
+
+			/* Not really needed: !nx will cause pte.nx to fault */
+			x |= !mmu->nx;
+			/* Allow supervisor writes if !cr0.wp */
+			w |= !is_write_protection(vcpu) && !uf;
+			/* Disallow supervisor fetches of user code if cr4.smep */
+			x &= !(smep && u && !uf);
+
+			fault = (ff && !x) || (uf && !u) || (wf && !w);
+			map |= fault << bit;
+		}
+		mmu->permissions[byte] = map;
+	}
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *context,
 					int level)
@@ -3524,6 +3556,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 	context->root_level = level;
 
 	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
 	context->new_cr3 = paging_new_cr3;
@@ -3552,6 +3585,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 	context->root_level = PT32_ROOT_LEVEL;
 
 	reset_rsvds_bits_mask(vcpu, context);
+	update_permission_bitmask(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
@@ -3612,6 +3646,8 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 		context->gva_to_gpa = paging32_gva_to_gpa;
 	}
 
+	update_permission_bitmask(vcpu, context);
+
 	return 0;
 }
 
@@ -3687,6 +3723,8 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 		g_context->gva_to_gpa = paging32_gva_to_gpa_nested;
 	}
 
+	update_permission_bitmask(vcpu, g_context);
+
 	return 0;
 }
 
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 2832081e9b2e..584660775d08 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -89,17 +89,14 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
 	return kvm_read_cr0_bits(vcpu, X86_CR0_WP);
 }
 
-static inline bool check_write_user_access(struct kvm_vcpu *vcpu,
-					   bool write_fault, bool user_fault,
-					   unsigned long pte)
+/*
+ * Will a fault with a given page-fault error code (pfec) cause a permission
+ * fault with the given access (in ACC_* format)?
+ */
+static inline bool permission_fault(struct kvm_mmu *mmu, unsigned pte_access,
+				    unsigned pfec)
 {
-	if (unlikely(write_fault && !is_writable_pte(pte)
-	      && (user_fault || is_write_protection(vcpu))))
-		return false;
-
-	if (unlikely(user_fault && !(pte & PT_USER_MASK)))
-		return false;
-
-	return true;
+	return (mmu->permissions[pfec >> 1] >> pte_access) & 1;
 }
+
 #endif
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 35a05dd2f69c..8f6c59fadbbe 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -169,7 +169,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, uninitialized_var(pte_access);
+	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
 	bool eperm, last_gpte;
 	int offset;
@@ -237,24 +237,9 @@ retry_walk:
 			goto error;
 		}
 
-		if (!check_write_user_access(vcpu, write_fault, user_fault,
-					  pte))
-			eperm = true;
-
-#if PTTYPE == 64
-		if (unlikely(fetch_fault && (pte & PT64_NX_MASK)))
-			eperm = true;
-#endif
+		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
-		if (last_gpte) {
-			pte_access = pt_access & gpte_access(vcpu, pte);
-			/* check if the kernel is fetching from user page */
-			if (unlikely(pte_access & PT_USER_MASK) &&
-			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP))
-				if (fetch_fault && !user_fault)
-					eperm = true;
-		}
 
 		walker->ptes[walker->level - 1] = pte;
 
@@ -284,10 +269,11 @@ retry_walk:
 			break;
 		}
 
-		pt_access &= gpte_access(vcpu, pte);
+		pt_access &= pte_access;
 		--walker->level;
 	}
 
+	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
 		errcode |= PFERR_PRESENT_MASK;
 		goto error;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 19047eafa38d..497226e49d4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3672,20 +3672,17 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
 				gpa_t *gpa, struct x86_exception *exception,
 				bool write)
 {
-	u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+	u32 access = ((kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0)
+		| (write ? PFERR_WRITE_MASK : 0);
 
-	if (vcpu_match_mmio_gva(vcpu, gva) &&
-		  check_write_user_access(vcpu, write, access,
-		  vcpu->arch.access)) {
+	if (vcpu_match_mmio_gva(vcpu, gva)
+	    && !permission_fault(vcpu->arch.walk_mmu, vcpu->arch.access, access)) {
 		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
 					(gva & (PAGE_SIZE - 1));
 		trace_vcpu_match_mmio(gva, *gpa, write, false);
 		return 1;
 	}
 
-	if (write)
-		access |= PFERR_WRITE_MASK;
-
 	*gpa = vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
 
 	if (*gpa == UNMAPPED_GVA)
-- 
cgit v1.2.3


From 13d22b6aebb000aeaf137862c6c0e0c4d138d798 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 15:12:09 +0300
Subject: KVM: MMU: Simplify walk_addr_generic() loop

The page table walk is coded as an infinite loop, with a special
case on the last pte.

Code it as an ordinary loop with a termination condition on the last
pte (large page or walk length exhausted), and put the last pte handling
code after the loop where it belongs.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 60 +++++++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 8f6c59fadbbe..1b4c14d235a0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -171,12 +171,15 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
-	bool eperm, last_gpte;
+	bool eperm;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
 	const int fetch_fault = access & PFERR_FETCH_MASK;
 	u16 errcode = 0;
+	gpa_t real_gpa;
+	gfn_t gfn;
+	u32 ac;
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
@@ -197,12 +200,16 @@ retry_walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
-	pt_access = ACC_ALL;
+	pt_access = pte_access = ACC_ALL;
+	++walker->level;
 
-	for (;;) {
+	do {
 		gfn_t real_gfn;
 		unsigned long host_addr;
 
+		pt_access &= pte_access;
+		--walker->level;
+
 		index = PT_INDEX(addr, walker->level);
 
 		table_gfn = gpte_to_gfn(pte);
@@ -239,39 +246,8 @@ retry_walk:
 
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
-		last_gpte = FNAME(is_last_gpte)(walker, vcpu, mmu, pte);
-
 		walker->ptes[walker->level - 1] = pte;
-
-		if (last_gpte) {
-			int lvl = walker->level;
-			gpa_t real_gpa;
-			gfn_t gfn;
-			u32 ac;
-
-			gfn = gpte_to_gfn_lvl(pte, lvl);
-			gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT;
-
-			if (PTTYPE == 32 &&
-			    walker->level == PT_DIRECTORY_LEVEL &&
-			    is_cpuid_PSE36())
-				gfn += pse36_gfn_delta(pte);
-
-			ac = write_fault | fetch_fault | user_fault;
-
-			real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn),
-						      ac);
-			if (real_gpa == UNMAPPED_GVA)
-				return 0;
-
-			walker->gfn = real_gpa >> PAGE_SHIFT;
-
-			break;
-		}
-
-		pt_access &= pte_access;
-		--walker->level;
-	}
+	} while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte));
 
 	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
@@ -279,6 +255,20 @@ retry_walk:
 		goto error;
 	}
 
+	gfn = gpte_to_gfn_lvl(pte, walker->level);
+	gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT;
+
+	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
+		gfn += pse36_gfn_delta(pte);
+
+	ac = write_fault | fetch_fault | user_fault;
+
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
+	if (real_gpa == UNMAPPED_GVA)
+		return 0;
+
+	walker->gfn = real_gpa >> PAGE_SHIFT;
+
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
 
-- 
cgit v1.2.3


From 6fd01b711bee96ce3356f7b6f370ab708e37504b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 12 Sep 2012 20:46:56 +0300
Subject: KVM: MMU: Optimize is_last_gpte()

Instead of branchy code depending on level, gpte.ps, and mmu configuration,
prepare everything in a bitmap during mode changes and look it up during
runtime.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  7 +++++++
 arch/x86/kvm/mmu.c              | 31 +++++++++++++++++++++++++++++++
 arch/x86/kvm/mmu.h              |  3 ++-
 arch/x86/kvm/paging_tmpl.h      | 20 +-------------------
 4 files changed, 41 insertions(+), 20 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3318bde206a5..43aeb9422839 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -298,6 +298,13 @@ struct kvm_mmu {
 	u64 *lm_root;
 	u64 rsvd_bits_mask[2][4];
 
+	/*
+	 * Bitmap: bit set = last pte in walk
+	 * index[0:1]: level (zero-based)
+	 * index[2]: pte.ps
+	 */
+	u8 last_pte_bitmap;
+
 	bool nx;
 
 	u64 pdptrs[4]; /* pae */
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9c6188931f87..d289fee1ffb8 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3447,6 +3447,15 @@ static inline unsigned gpte_access(struct kvm_vcpu *vcpu, u64 gpte)
 	return access;
 }
 
+static inline bool is_last_gpte(struct kvm_mmu *mmu, unsigned level, unsigned gpte)
+{
+	unsigned index;
+
+	index = level - 1;
+	index |= (gpte & PT_PAGE_SIZE_MASK) >> (PT_PAGE_SIZE_SHIFT - 2);
+	return mmu->last_pte_bitmap & (1 << index);
+}
+
 #define PTTYPE 64
 #include "paging_tmpl.h"
 #undef PTTYPE
@@ -3548,6 +3557,24 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu
 	}
 }
 
+static void update_last_pte_bitmap(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
+{
+	u8 map;
+	unsigned level, root_level = mmu->root_level;
+	const unsigned ps_set_index = 1 << 2;  /* bit 2 of index: ps */
+
+	if (root_level == PT32E_ROOT_LEVEL)
+		--root_level;
+	/* PT_PAGE_TABLE_LEVEL always terminates */
+	map = 1 | (1 << ps_set_index);
+	for (level = PT_DIRECTORY_LEVEL; level <= root_level; ++level) {
+		if (level <= PT_PDPE_LEVEL
+		    && (mmu->root_level >= PT32E_ROOT_LEVEL || is_pse(vcpu)))
+			map |= 1 << (ps_set_index | (level - 1));
+	}
+	mmu->last_pte_bitmap = map;
+}
+
 static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 					struct kvm_mmu *context,
 					int level)
@@ -3557,6 +3584,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu,
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	ASSERT(is_pae(vcpu));
 	context->new_cr3 = paging_new_cr3;
@@ -3586,6 +3614,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu,
 
 	reset_rsvds_bits_mask(vcpu, context);
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	context->new_cr3 = paging_new_cr3;
 	context->page_fault = paging32_page_fault;
@@ -3647,6 +3676,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(vcpu, context);
+	update_last_pte_bitmap(vcpu, context);
 
 	return 0;
 }
@@ -3724,6 +3754,7 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu)
 	}
 
 	update_permission_bitmask(vcpu, g_context);
+	update_last_pte_bitmap(vcpu, g_context);
 
 	return 0;
 }
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 584660775d08..69871080e866 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -20,7 +20,8 @@
 #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT)
 #define PT_DIRTY_SHIFT 6
 #define PT_DIRTY_MASK (1ULL << PT_DIRTY_SHIFT)
-#define PT_PAGE_SIZE_MASK (1ULL << 7)
+#define PT_PAGE_SIZE_SHIFT 7
+#define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
 #define PT_PAT_MASK (1ULL << 7)
 #define PT_GLOBAL_MASK (1ULL << 8)
 #define PT64_NX_SHIFT 63
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1b4c14d235a0..134ea7b1c585 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -103,24 +103,6 @@ static int FNAME(cmpxchg_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	return (ret != orig_pte);
 }
 
-static bool FNAME(is_last_gpte)(struct guest_walker *walker,
-				struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
-				pt_element_t gpte)
-{
-	if (walker->level == PT_PAGE_TABLE_LEVEL)
-		return true;
-
-	if ((walker->level == PT_DIRECTORY_LEVEL) && is_large_pte(gpte) &&
-	    (PTTYPE == 64 || is_pse(vcpu)))
-		return true;
-
-	if ((walker->level == PT_PDPE_LEVEL) && is_large_pte(gpte) &&
-	    (mmu->root_level == PT64_ROOT_LEVEL))
-		return true;
-
-	return false;
-}
-
 static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
 					     struct kvm_mmu *mmu,
 					     struct guest_walker *walker,
@@ -247,7 +229,7 @@ retry_walk:
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
-	} while (!FNAME(is_last_gpte)(walker, vcpu, mmu, pte));
+	} while (!is_last_gpte(mmu, walker->level, pte));
 
 	eperm |= permission_fault(mmu, pte_access, access);
 	if (unlikely(eperm)) {
-- 
cgit v1.2.3


From 71331a1da1e3a66d14bb3864f99e32d84ab5a76f Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 14:49:15 +0300
Subject: KVM: MMU: Eliminate eperm temporary

'eperm' is no longer used in the walker loop, so we can eliminate it.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 134ea7b1c585..95a64d1dccc7 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -153,7 +153,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	gfn_t table_gfn;
 	unsigned index, pt_access, pte_access;
 	gpa_t pte_gpa;
-	bool eperm;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
 	const int user_fault  = access & PFERR_USER_MASK;
@@ -165,7 +164,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
-	eperm = false;
 	walker->level = mmu->root_level;
 	pte           = mmu->get_cr3(vcpu);
 
@@ -231,8 +229,7 @@ retry_walk:
 		walker->ptes[walker->level - 1] = pte;
 	} while (!is_last_gpte(mmu, walker->level, pte));
 
-	eperm |= permission_fault(mmu, pte_access, access);
-	if (unlikely(eperm)) {
+	if (unlikely(permission_fault(mmu, pte_access, access))) {
 		errcode |= PFERR_PRESENT_MASK;
 		goto error;
 	}
-- 
cgit v1.2.3


From b514c30f7729b66af481fde3c1225e832e339d5b Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Sun, 16 Sep 2012 15:03:02 +0300
Subject: KVM: MMU: Avoid access/dirty update loop if all is well

Keep track of accessed/dirty bits; if they are all set, do not
enter the accessed/dirty update loop.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 95a64d1dccc7..810c1da2ee44 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -151,7 +151,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	pt_element_t pte;
 	pt_element_t __user *uninitialized_var(ptep_user);
 	gfn_t table_gfn;
-	unsigned index, pt_access, pte_access;
+	unsigned index, pt_access, pte_access, accessed_dirty, shift;
 	gpa_t pte_gpa;
 	int offset;
 	const int write_fault = access & PFERR_WRITE_MASK;
@@ -180,6 +180,7 @@ retry_walk:
 	ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
 	       (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0);
 
+	accessed_dirty = PT_ACCESSED_MASK;
 	pt_access = pte_access = ACC_ALL;
 	++walker->level;
 
@@ -224,6 +225,7 @@ retry_walk:
 			goto error;
 		}
 
+		accessed_dirty &= pte;
 		pte_access = pt_access & gpte_access(vcpu, pte);
 
 		walker->ptes[walker->level - 1] = pte;
@@ -251,11 +253,23 @@ retry_walk:
 	if (!write_fault)
 		protect_clean_gpte(&pte_access, pte);
 
-	ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
-	if (unlikely(ret < 0))
-		goto error;
-	else if (ret)
-		goto retry_walk;
+	/*
+	 * On a write fault, fold the dirty bit into accessed_dirty by shifting it one
+	 * place right.
+	 *
+	 * On a read fault, do nothing.
+	 */
+	shift = write_fault >> ilog2(PFERR_WRITE_MASK);
+	shift *= PT_DIRTY_SHIFT - PT_ACCESSED_SHIFT;
+	accessed_dirty &= pte >> shift;
+
+	if (unlikely(!accessed_dirty)) {
+		ret = FNAME(update_accessed_dirty_bits)(vcpu, mmu, walker, write_fault);
+		if (unlikely(ret < 0))
+			goto error;
+		else if (ret)
+			goto retry_walk;
+	}
 
 	walker->pt_access = pt_access;
 	walker->pte_access = pte_access;
-- 
cgit v1.2.3


From c5421519f30bd5ed77857a78de6dc8414385e602 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 19 Sep 2012 19:33:48 +0300
Subject: KVM: MMU: Eliminate pointless temporary 'ac'

'ac' essentially reconstructs the 'access' variable we already
have, except for the PFERR_PRESENT_MASK and PFERR_RSVD_MASK.  As
these are not used by callees, just use 'access' directly.

Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/paging_tmpl.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 810c1da2ee44..714e2c01a6fe 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -160,7 +160,6 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
 	u16 errcode = 0;
 	gpa_t real_gpa;
 	gfn_t gfn;
-	u32 ac;
 
 	trace_kvm_mmu_pagetable_walk(addr, access);
 retry_walk:
@@ -242,9 +241,7 @@ retry_walk:
 	if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36())
 		gfn += pse36_gfn_delta(pte);
 
-	ac = write_fault | fetch_fault | user_fault;
-
-	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), ac);
+	real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access);
 	if (real_gpa == UNMAPPED_GVA)
 		return 0;
 
-- 
cgit v1.2.3


From 1e08ec4a130e2745d96df169e67c58df98a07311 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 13 Sep 2012 17:19:24 +0300
Subject: KVM: optimize apic interrupt delivery

Most interrupt are delivered to only one vcpu. Use pre-build tables to
find interrupt destination instead of looping through all vcpus. In case
of logical mode loop only through vcpus in a logical cluster irq is sent
to.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  12 +++
 arch/x86/kvm/lapic.c            | 188 +++++++++++++++++++++++++++++++++++++---
 arch/x86/kvm/lapic.h            |   3 +
 arch/x86/kvm/x86.c              |   2 +
 virt/kvm/irq_comm.c             |   7 +-
 5 files changed, 199 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 43aeb9422839..0b902c98f279 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -525,6 +525,16 @@ struct kvm_arch_memory_slot {
 	struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1];
 };
 
+struct kvm_apic_map {
+	struct rcu_head rcu;
+	u8 ldr_bits;
+	/* fields bellow are used to decode ldr values in different modes */
+	u32 cid_shift, cid_mask, lid_mask;
+	struct kvm_lapic *phys_map[256];
+	/* first index is cluster id second is cpu id in a cluster */
+	struct kvm_lapic *logical_map[16][16];
+};
+
 struct kvm_arch {
 	unsigned int n_used_mmu_pages;
 	unsigned int n_requested_mmu_pages;
@@ -542,6 +552,8 @@ struct kvm_arch {
 	struct kvm_ioapic *vioapic;
 	struct kvm_pit *vpit;
 	int vapics_in_nmi_mode;
+	struct mutex apic_map_lock;
+	struct kvm_apic_map *apic_map;
 
 	unsigned int tss_addr;
 	struct page *apic_access_page;
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 6f9fd633c888..c6e6b721b6ee 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -140,11 +140,110 @@ static inline int apic_enabled(struct kvm_lapic *apic)
 	(LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
 	 APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
 
+static inline int apic_x2apic_mode(struct kvm_lapic *apic)
+{
+	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
+}
+
 static inline int kvm_apic_id(struct kvm_lapic *apic)
 {
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
+static inline u16 apic_cluster_id(struct kvm_apic_map *map, u32 ldr)
+{
+	u16 cid;
+	ldr >>= 32 - map->ldr_bits;
+	cid = (ldr >> map->cid_shift) & map->cid_mask;
+
+	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
+
+	return cid;
+}
+
+static inline u16 apic_logical_id(struct kvm_apic_map *map, u32 ldr)
+{
+	ldr >>= (32 - map->ldr_bits);
+	return ldr & map->lid_mask;
+}
+
+static void recalculate_apic_map(struct kvm *kvm)
+{
+	struct kvm_apic_map *new, *old = NULL;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	new = kzalloc(sizeof(struct kvm_apic_map), GFP_KERNEL);
+
+	mutex_lock(&kvm->arch.apic_map_lock);
+
+	if (!new)
+		goto out;
+
+	new->ldr_bits = 8;
+	/* flat mode is default */
+	new->cid_shift = 8;
+	new->cid_mask = 0;
+	new->lid_mask = 0xff;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr;
+
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		/*
+		 * All APICs have to be configured in the same mode by an OS.
+		 * We take advatage of this while building logical id loockup
+		 * table. After reset APICs are in xapic/flat mode, so if we
+		 * find apic with different setting we assume this is the mode
+		 * OS wants all apics to be in; build lookup table accordingly.
+		 */
+		if (apic_x2apic_mode(apic)) {
+			new->ldr_bits = 32;
+			new->cid_shift = 16;
+			new->cid_mask = new->lid_mask = 0xffff;
+		} else if (kvm_apic_sw_enabled(apic) &&
+				!new->cid_mask /* flat mode */ &&
+				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
+			new->cid_shift = 4;
+			new->cid_mask = 0xf;
+			new->lid_mask = 0xf;
+		}
+
+		new->phys_map[kvm_apic_id(apic)] = apic;
+
+		ldr = kvm_apic_get_reg(apic, APIC_LDR);
+		cid = apic_cluster_id(new, ldr);
+		lid = apic_logical_id(new, ldr);
+
+		if (lid)
+			new->logical_map[cid][ffs(lid) - 1] = apic;
+	}
+out:
+	old = rcu_dereference_protected(kvm->arch.apic_map,
+			lockdep_is_held(&kvm->arch.apic_map_lock));
+	rcu_assign_pointer(kvm->arch.apic_map, new);
+	mutex_unlock(&kvm->arch.apic_map_lock);
+
+	if (old)
+		kfree_rcu(old, rcu);
+}
+
+static inline void kvm_apic_set_id(struct kvm_lapic *apic, u8 id)
+{
+	apic_set_reg(apic, APIC_ID, id << 24);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
+static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
+{
+	apic_set_reg(apic, APIC_LDR, id);
+	recalculate_apic_map(apic->vcpu->kvm);
+}
+
 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
 {
 	return !(kvm_apic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
@@ -194,11 +293,6 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_LVR, v);
 }
 
-static inline int apic_x2apic_mode(struct kvm_lapic *apic)
-{
-	return apic->vcpu->arch.apic_base & X2APIC_ENABLE;
-}
-
 static const unsigned int apic_lvt_mask[APIC_LVT_NUM] = {
 	LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
 	LVT_MASK | APIC_MODE_MASK,	/* LVTTHMR */
@@ -483,6 +577,72 @@ int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
 	return result;
 }
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r)
+{
+	struct kvm_apic_map *map;
+	unsigned long bitmap = 1;
+	struct kvm_lapic **dst;
+	int i;
+	bool ret = false;
+
+	*r = -1;
+
+	if (irq->shorthand == APIC_DEST_SELF) {
+		*r = kvm_apic_set_irq(src->vcpu, irq);
+		return true;
+	}
+
+	if (irq->shorthand)
+		return false;
+
+	rcu_read_lock();
+	map = rcu_dereference(kvm->arch.apic_map);
+
+	if (!map)
+		goto out;
+
+	if (irq->dest_mode == 0) { /* physical mode */
+		if (irq->delivery_mode == APIC_DM_LOWEST ||
+				irq->dest_id == 0xff)
+			goto out;
+		dst = &map->phys_map[irq->dest_id & 0xff];
+	} else {
+		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+
+		dst = map->logical_map[apic_cluster_id(map, mda)];
+
+		bitmap = apic_logical_id(map, mda);
+
+		if (irq->delivery_mode == APIC_DM_LOWEST) {
+			int l = -1;
+			for_each_set_bit(i, &bitmap, 16) {
+				if (!dst[i])
+					continue;
+				if (l < 0)
+					l = i;
+				else if (kvm_apic_compare_prio(dst[i]->vcpu, dst[l]->vcpu) < 0)
+					l = i;
+			}
+
+			bitmap = (l >= 0) ? 1 << l : 0;
+		}
+	}
+
+	for_each_set_bit(i, &bitmap, 16) {
+		if (!dst[i])
+			continue;
+		if (*r < 0)
+			*r = 0;
+		*r += kvm_apic_set_irq(dst[i]->vcpu, irq);
+	}
+
+	ret = true;
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
 /*
  * Add a pending IRQ into lapic.
  * Return 1 if successfully added and 0 if discarded.
@@ -886,7 +1046,7 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 	switch (reg) {
 	case APIC_ID:		/* Local APIC ID */
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_ID, val);
+			kvm_apic_set_id(apic, val >> 24);
 		else
 			ret = 1;
 		break;
@@ -902,15 +1062,16 @@ static int apic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
 
 	case APIC_LDR:
 		if (!apic_x2apic_mode(apic))
-			apic_set_reg(apic, APIC_LDR, val & APIC_LDR_MASK);
+			kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
 		else
 			ret = 1;
 		break;
 
 	case APIC_DFR:
-		if (!apic_x2apic_mode(apic))
+		if (!apic_x2apic_mode(apic)) {
 			apic_set_reg(apic, APIC_DFR, val | 0x0FFFFFFF);
-		else
+			recalculate_apic_map(apic->vcpu->kvm);
+		} else
 			ret = 1;
 		break;
 
@@ -1141,6 +1302,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 			static_key_slow_dec_deferred(&apic_hw_disabled);
 		else
 			static_key_slow_inc(&apic_hw_disabled.key);
+		recalculate_apic_map(vcpu->kvm);
 	}
 
 	if (!kvm_vcpu_is_bsp(apic->vcpu))
@@ -1150,7 +1312,7 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 	if (apic_x2apic_mode(apic)) {
 		u32 id = kvm_apic_id(apic);
 		u32 ldr = ((id & ~0xf) << 16) | (1 << (id & 0xf));
-		apic_set_reg(apic, APIC_LDR, ldr);
+		kvm_apic_set_ldr(apic, ldr);
 	}
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
@@ -1175,7 +1337,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	/* Stop the timer in case it's a reset to an active apic */
 	hrtimer_cancel(&apic->lapic_timer.timer);
 
-	apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24);
+	kvm_apic_set_id(apic, vcpu->vcpu_id);
 	kvm_apic_set_version(apic->vcpu);
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
@@ -1186,7 +1348,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
 	apic_set_reg(apic, APIC_TASKPRI, 0);
-	apic_set_reg(apic, APIC_LDR, 0);
+	kvm_apic_set_ldr(apic, 0);
 	apic_set_reg(apic, APIC_ESR, 0);
 	apic_set_reg(apic, APIC_ICR, 0);
 	apic_set_reg(apic, APIC_ICR2, 0);
@@ -1404,6 +1566,8 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
 	/* set SPIV separately to get count of SW disabled APICs right */
 	apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof *s);
+	/* call kvm_apic_set_id() to put apic into apic_map */
+	kvm_apic_set_id(apic, kvm_apic_id(apic));
 	kvm_apic_set_version(vcpu);
 
 	apic_update_ppr(apic);
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 615a8b030168..e5ebf9f3571f 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -52,6 +52,9 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
 
+bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, int *r);
+
 u64 kvm_get_apic_base(struct kvm_vcpu *vcpu);
 void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data);
 void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 497226e49d4b..fc2a0a132e4b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6270,6 +6270,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
+	mutex_init(&kvm->arch.apic_map_lock);
 
 	return 0;
 }
@@ -6322,6 +6323,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 		put_page(kvm->arch.apic_access_page);
 	if (kvm->arch.ept_identity_pagetable)
 		put_page(kvm->arch.ept_identity_pagetable);
+	kfree(rcu_dereference_check(kvm->arch.apic_map, 1));
 }
 
 void kvm_arch_free_memslot(struct kvm_memory_slot *free,
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 7118be0f2f2c..3ca89c451d6b 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -68,8 +68,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 	struct kvm_vcpu *vcpu, *lowest = NULL;
 
 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-			kvm_is_dm_lowest_prio(irq))
+			kvm_is_dm_lowest_prio(irq)) {
 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r))
+		return r;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		if (!kvm_apic_present(vcpu))
-- 
cgit v1.2.3


From 50a011f6409e888d5f41343024d24885281f048c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 20 Sep 2012 14:43:54 +0200
Subject: kprobes/x86: Move skip_singlestep up
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

I get this warning:

  arch/x86/kernel/kprobes.c:544:23: warning: ‘skip_singlestep’ declared ‘static’ but never defined

on tip/auto-latest.

Put the skip_singlestep function declaration up, in
KPROBES_CAN_USE_FTRACE and drop the superfluous forward
declaration.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1348145034-16603-1-git-send-email-bp@amd64.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/kprobes.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b7c2a85d1926..57916c0d3cf6 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -541,8 +541,23 @@ reenter_kprobe(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb
 	return 1;
 }
 
+#ifdef KPROBES_CAN_USE_FTRACE
 static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb);
+				      struct kprobe_ctlblk *kcb)
+{
+	/*
+	 * Emulate singlestep (and also recover regs->ip)
+	 * as if there is a 5byte nop
+	 */
+	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
+	if (unlikely(p->post_handler)) {
+		kcb->kprobe_status = KPROBE_HIT_SSDONE;
+		p->post_handler(p, regs, 0);
+	}
+	__this_cpu_write(current_kprobe, NULL);
+}
+#endif
+
 /*
  * Interrupts are disabled on entry as trap3 is an interrupt gate and they
  * remain disabled throughout this function.
@@ -1061,21 +1076,6 @@ int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
 }
 
 #ifdef KPROBES_CAN_USE_FTRACE
-static void __kprobes skip_singlestep(struct kprobe *p, struct pt_regs *regs,
-				      struct kprobe_ctlblk *kcb)
-{
-	/*
-	 * Emulate singlestep (and also recover regs->ip)
-	 * as if there is a 5byte nop
-	 */
-	regs->ip = (unsigned long)p->addr + MCOUNT_INSN_SIZE;
-	if (unlikely(p->post_handler)) {
-		kcb->kprobe_status = KPROBE_HIT_SSDONE;
-		p->post_handler(p, regs, 0);
-	}
-	__this_cpu_write(current_kprobe, NULL);
-}
-
 /* Ftrace callback handler for kprobes */
 void __kprobes kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 				     struct ftrace_ops *ops, struct pt_regs *regs)
-- 
cgit v1.2.3


From e76623d69408d0bd66a296c6ee5eae1b17a6adfc Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 2 Aug 2012 22:12:06 +0400
Subject: x86: get rid of TIF_IRET hackery

TIF_NOTIFY_RESUME will work in precisely the same way; all that
is achieved by TIF_IRET is appearing that there's some work to be
done, so we end up on the iret exit path.  Just use NOTIFY_RESUME.
And for execve() do that in 32bit start_thread(), not sys_execve()
itself.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/include/asm/thread_info.h | 2 --
 arch/x86/kernel/process.c          | 8 --------
 arch/x86/kernel/process_32.c       | 5 +++++
 arch/x86/kernel/signal.c           | 4 ----
 arch/x86/kernel/vm86_32.c          | 6 +++---
 5 files changed, 8 insertions(+), 17 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007ec..c509d07bdbd7 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -79,7 +79,6 @@ struct thread_info {
 #define TIF_SIGPENDING		2	/* signal pending */
 #define TIF_NEED_RESCHED	3	/* rescheduling necessary */
 #define TIF_SINGLESTEP		4	/* reenable singlestep on user return*/
-#define TIF_IRET		5	/* force IRET */
 #define TIF_SYSCALL_EMU		6	/* syscall emulation active */
 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
 #define TIF_SECCOMP		8	/* secure computing */
@@ -104,7 +103,6 @@ struct thread_info {
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
 #define _TIF_SINGLESTEP		(1 << TIF_SINGLESTEP)
 #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
-#define _TIF_IRET		(1 << TIF_IRET)
 #define _TIF_SYSCALL_EMU	(1 << TIF_SYSCALL_EMU)
 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ef6a8456f719..7162e9c1f598 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -351,14 +351,6 @@ long sys_execve(const char __user *name,
 	if (IS_ERR(filename))
 		return error;
 	error = do_execve(filename, argv, envp, regs);
-
-#ifdef CONFIG_X86_32
-	if (error == 0) {
-		/* Make sure we don't return using sysenter.. */
-                set_thread_flag(TIF_IRET);
-        }
-#endif
-
 	putname(filename);
 	return error;
 }
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 516fa186121b..75fcad146def 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -194,6 +194,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	 * Free the old FP and other extended state
 	 */
 	free_thread_xstate(current);
+	/*
+	 * force it to the iret return path by making it look as if there was
+	 * some work pending.
+	 */
+	set_thread_flag(TIF_NOTIFY_RESUME);
 }
 EXPORT_SYMBOL_GPL(start_thread);
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376e..c648fc529872 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -800,10 +800,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	}
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
-
-#ifdef CONFIG_X86_32
-	clear_thread_flag(TIF_IRET);
-#endif /* CONFIG_X86_32 */
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 54abcc0baf23..5c9687b1bde6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -561,9 +561,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
 		if ((trapno == 3) || (trapno == 1)) {
 			KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
 			/* setting this flag forces the code in entry_32.S to
-			   call save_v86_state() and change the stack pointer
-			   to KVM86->regs32 */
-			set_thread_flag(TIF_IRET);
+			   the path where we call save_v86_state() and change
+			   the stack pointer to KVM86->regs32 */
+			set_thread_flag(TIF_NOTIFY_RESUME);
 			return 0;
 		}
 		do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
-- 
cgit v1.2.3


From 8e2c85aa6c7a158d967db75931db7f13d20d31f4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Sep 2012 13:39:47 -0400
Subject: um: let signal_delivered() do SIGTRAP on singlestepping into handler

... rather than duplicating that in sigframe setup code (and doing that
inconsistently, at that)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/signal.c | 6 +++++-
 arch/x86/um/signal.c    | 6 ------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 7362d58efc29..cc9c2350e417 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 			 struct k_sigaction *ka, siginfo_t *info)
 {
 	sigset_t *oldset = sigmask_to_save();
+	int singlestep = 0;
 	unsigned long sp;
 	int err;
 
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		singlestep = 1;
+
 	/* Did we come from a system call? */
 	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
@@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	if (err)
 		force_sigsegv(signr, current);
 	else
-		signal_delivered(signr, info, ka, regs, 0);
+		signal_delivered(signr, info, ka, regs, singlestep);
 }
 
 static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index a508cea13503..ba7363ecf896 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) 0;
 	PT_REGS_CX(regs) = (unsigned long) 0;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
@@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) &frame->info;
 	PT_REGS_CX(regs) = (unsigned long) &frame->uc;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
-- 
cgit v1.2.3


From a4d94ff8aa864c05b33c2de1f8c5d0176d7a4b63 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Sep 2012 09:28:25 -0400
Subject: um: kill thread->forking

we only use that to tell copy_thread() done by syscall from that
done by kernel_thread().  However, it's easier to do simply by
checking PF_KTHREAD in thread flags.

Merge sys_clone() guts for 32bit and 64bit, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  9 ---------
 arch/um/kernel/process.c                |  8 ++++----
 arch/um/kernel/syscall.c                | 24 ++++++++++++------------
 arch/x86/um/shared/sysdep/syscalls.h    |  2 ++
 arch/x86/um/sys_call_table_32.c         |  2 +-
 arch/x86/um/syscalls_32.c               | 27 +++++++--------------------
 arch/x86/um/syscalls_64.c               | 23 +++--------------------
 7 files changed, 29 insertions(+), 66 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 69f1c57a8d0d..33a6a2423bd2 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,14 +20,6 @@ struct mm_struct;
 
 struct thread_struct {
 	struct task_struct *saved_task;
-	/*
-	 * This flag is set to 1 before calling do_fork (and analyzed in
-	 * copy_thread) to mark that we are begin called from userspace (fork /
-	 * vfork / clone), and reset to 0 after. It is left to 0 when called
-	 * from kernelspace (i.e. kernel_thread() or fork_idle(),
-	 * as of 2.6.11).
-	 */
-	int forking;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
@@ -58,7 +50,6 @@ struct thread_struct {
 
 #define INIT_THREAD \
 { \
-	.forking		= 0, \
 	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 57fc7028714a..c5f5afa50745 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
+	int kthread = current->flags & PF_KTHREAD;
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
 
-	if (current->thread.forking) {
+	if (!kthread) {
 	  	memcpy(&p->thread.regs.regs, &regs->regs,
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		handler = fork_handler;
 
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
-	}
-	else {
+	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
 		p->thread.request.u.thread = current->thread.request.u.thread;
 		handler = new_thread_handler;
@@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
 	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
 
-	if (current->thread.forking) {
+	if (!kthread) {
 		clear_flushed_tls(p);
 
 		/*
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index f958cb876ee3..a4c6d8eee74c 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -17,25 +17,25 @@
 
 long sys_fork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+	return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
 }
 
 long sys_vfork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
 		      UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid)
+{
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+
+	return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
 }
 
 long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index bd9a89b67e41..ca255a805ed9 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,3 +1,5 @@
+extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 68d1dc91b37b..b5408cecac6c 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -28,7 +28,7 @@
 #define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
+#define ptregs_clone i386_clone
 #define ptregs_vm86 sys_vm86
 #define ptregs_sigaltstack sys_sigaltstack
 #define ptregs_vfork sys_vfork
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index b853e8600b9d..db444c7218fe 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -3,37 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/syscalls.h>
+#include <sysdep/syscalls.h>
 
 /*
  * The prototype on i386 is:
  *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
  *
  * and the "newtls" arg. on i386 is read by copy_thread directly from the
  * register saved on the stack.
  */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tid, void *newtls, int __user *child_tid)
+long i386_clone(unsigned long clone_flags, unsigned long newsp,
+		int __user *parent_tid, void *newtls, int __user *child_tid)
 {
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
+	return sys_clone(clone_flags, newsp, parent_tid, child_tid);
 }
 
+
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index f3d82bb6e15a..adb08eb5c22a 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -5,12 +5,9 @@
  * Licensed under the GPL
  */
 
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <os.h>
 
 long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 {
@@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr)
 	return arch_prctl(current, code, (unsigned long __user *) addr);
 }
 
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid)
-{
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
-}
-
 void arch_switch_to(struct task_struct *to)
 {
 	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-- 
cgit v1.2.3


From 24cc7fb69a5b5edfdff1d38c6a213d6a33648829 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.de>
Date: Thu, 20 Sep 2012 10:28:45 -0400
Subject: x86/kbuild: archscripts depends on scripts_basic

While building the SUSE kernel packages, which build the scripts,
make clean, and then build everything, we have been running into spurious
build failures. We tracked them down to a simple dependency issue:

$ make mrproper
  CLEAN   arch/x86/tools
  CLEAN   scripts/basic
$ cp patches/config/x86_64/desktop .config
$ make archscripts
  HOSTCC  arch/x86/tools/relocs
/bin/sh: scripts/basic/fixdep: No such file or directory
make[3]: *** [arch/x86/tools/relocs] Error 1
make[2]: *** [archscripts] Error 2
make[1]: *** [sub-make] Error 2
make: *** [all] Error 2

This was introduced by commit
6520fe55 (x86, realmode: 16-bit real-mode code support for relocs),
which added the archscripts dependency to archprepare.

This patch adds the scripts_basic dependency to the x86 archscripts.

Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/x86/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861ec..c098ca4671de 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -138,7 +138,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
 
-archscripts:
+archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 ###
-- 
cgit v1.2.3


From 26bf264e871a4b9a8ac09c21a2b518e7f23830d5 Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Date: Mon, 17 Sep 2012 16:31:13 +0800
Subject: KVM: x86: Export svm/vmx exit code and vector code to userspace

Exporting KVM exit information to userspace to be consumed by perf.

Signed-off-by: Dong Hao <haodong@linux.vnet.ibm.com>
[ Dong Hao <haodong@linux.vnet.ibm.com>: rebase it on acme's git tree ]
Signed-off-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Acked-by: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: kvm@vger.kernel.org
Cc: Runzhen Wang <runzhen@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1347870675-31495-2-git-send-email-haodong@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 arch/x86/include/asm/kvm.h      |  16 ++++
 arch/x86/include/asm/kvm_host.h |  16 ----
 arch/x86/include/asm/svm.h      | 205 +++++++++++++++++++++++++---------------
 arch/x86/include/asm/vmx.h      | 127 ++++++++++++++++---------
 arch/x86/kvm/trace.h            |  89 -----------------
 5 files changed, 230 insertions(+), 223 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h
index 246617efd67f..41e08cb6a092 100644
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -9,6 +9,22 @@
 #include <linux/types.h>
 #include <linux/ioctl.h>
 
+#define DE_VECTOR 0
+#define DB_VECTOR 1
+#define BP_VECTOR 3
+#define OF_VECTOR 4
+#define BR_VECTOR 5
+#define UD_VECTOR 6
+#define NM_VECTOR 7
+#define DF_VECTOR 8
+#define TS_VECTOR 10
+#define NP_VECTOR 11
+#define SS_VECTOR 12
+#define GP_VECTOR 13
+#define PF_VECTOR 14
+#define MF_VECTOR 16
+#define MC_VECTOR 18
+
 /* Select x86 specific features in <linux/kvm.h> */
 #define __KVM_HAVE_PIT
 #define __KVM_HAVE_IOAPIC
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 09155d64cf7e..1eaa6b056670 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -75,22 +75,6 @@
 #define KVM_HPAGE_MASK(x)	(~(KVM_HPAGE_SIZE(x) - 1))
 #define KVM_PAGES_PER_HPAGE(x)	(KVM_HPAGE_SIZE(x) / PAGE_SIZE)
 
-#define DE_VECTOR 0
-#define DB_VECTOR 1
-#define BP_VECTOR 3
-#define OF_VECTOR 4
-#define BR_VECTOR 5
-#define UD_VECTOR 6
-#define NM_VECTOR 7
-#define DF_VECTOR 8
-#define TS_VECTOR 10
-#define NP_VECTOR 11
-#define SS_VECTOR 12
-#define GP_VECTOR 13
-#define PF_VECTOR 14
-#define MF_VECTOR 16
-#define MC_VECTOR 18
-
 #define SELECTOR_TI_MASK (1 << 2)
 #define SELECTOR_RPL_MASK 0x03
 
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index f2b83bc7d784..cdf5674dd23a 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -1,6 +1,135 @@
 #ifndef __SVM_H
 #define __SVM_H
 
+#define SVM_EXIT_READ_CR0      0x000
+#define SVM_EXIT_READ_CR3      0x003
+#define SVM_EXIT_READ_CR4      0x004
+#define SVM_EXIT_READ_CR8      0x008
+#define SVM_EXIT_WRITE_CR0     0x010
+#define SVM_EXIT_WRITE_CR3     0x013
+#define SVM_EXIT_WRITE_CR4     0x014
+#define SVM_EXIT_WRITE_CR8     0x018
+#define SVM_EXIT_READ_DR0      0x020
+#define SVM_EXIT_READ_DR1      0x021
+#define SVM_EXIT_READ_DR2      0x022
+#define SVM_EXIT_READ_DR3      0x023
+#define SVM_EXIT_READ_DR4      0x024
+#define SVM_EXIT_READ_DR5      0x025
+#define SVM_EXIT_READ_DR6      0x026
+#define SVM_EXIT_READ_DR7      0x027
+#define SVM_EXIT_WRITE_DR0     0x030
+#define SVM_EXIT_WRITE_DR1     0x031
+#define SVM_EXIT_WRITE_DR2     0x032
+#define SVM_EXIT_WRITE_DR3     0x033
+#define SVM_EXIT_WRITE_DR4     0x034
+#define SVM_EXIT_WRITE_DR5     0x035
+#define SVM_EXIT_WRITE_DR6     0x036
+#define SVM_EXIT_WRITE_DR7     0x037
+#define SVM_EXIT_EXCP_BASE     0x040
+#define SVM_EXIT_INTR          0x060
+#define SVM_EXIT_NMI           0x061
+#define SVM_EXIT_SMI           0x062
+#define SVM_EXIT_INIT          0x063
+#define SVM_EXIT_VINTR         0x064
+#define SVM_EXIT_CR0_SEL_WRITE 0x065
+#define SVM_EXIT_IDTR_READ     0x066
+#define SVM_EXIT_GDTR_READ     0x067
+#define SVM_EXIT_LDTR_READ     0x068
+#define SVM_EXIT_TR_READ       0x069
+#define SVM_EXIT_IDTR_WRITE    0x06a
+#define SVM_EXIT_GDTR_WRITE    0x06b
+#define SVM_EXIT_LDTR_WRITE    0x06c
+#define SVM_EXIT_TR_WRITE      0x06d
+#define SVM_EXIT_RDTSC         0x06e
+#define SVM_EXIT_RDPMC         0x06f
+#define SVM_EXIT_PUSHF         0x070
+#define SVM_EXIT_POPF          0x071
+#define SVM_EXIT_CPUID         0x072
+#define SVM_EXIT_RSM           0x073
+#define SVM_EXIT_IRET          0x074
+#define SVM_EXIT_SWINT         0x075
+#define SVM_EXIT_INVD          0x076
+#define SVM_EXIT_PAUSE         0x077
+#define SVM_EXIT_HLT           0x078
+#define SVM_EXIT_INVLPG        0x079
+#define SVM_EXIT_INVLPGA       0x07a
+#define SVM_EXIT_IOIO          0x07b
+#define SVM_EXIT_MSR           0x07c
+#define SVM_EXIT_TASK_SWITCH   0x07d
+#define SVM_EXIT_FERR_FREEZE   0x07e
+#define SVM_EXIT_SHUTDOWN      0x07f
+#define SVM_EXIT_VMRUN         0x080
+#define SVM_EXIT_VMMCALL       0x081
+#define SVM_EXIT_VMLOAD        0x082
+#define SVM_EXIT_VMSAVE        0x083
+#define SVM_EXIT_STGI          0x084
+#define SVM_EXIT_CLGI          0x085
+#define SVM_EXIT_SKINIT        0x086
+#define SVM_EXIT_RDTSCP        0x087
+#define SVM_EXIT_ICEBP         0x088
+#define SVM_EXIT_WBINVD        0x089
+#define SVM_EXIT_MONITOR       0x08a
+#define SVM_EXIT_MWAIT         0x08b
+#define SVM_EXIT_MWAIT_COND    0x08c
+#define SVM_EXIT_XSETBV        0x08d
+#define SVM_EXIT_NPF           0x400
+
+#define SVM_EXIT_ERR           -1
+
+#define SVM_EXIT_REASONS \
+	{ SVM_EXIT_READ_CR0,    "read_cr0" }, \
+	{ SVM_EXIT_READ_CR3,    "read_cr3" }, \
+	{ SVM_EXIT_READ_CR4,    "read_cr4" }, \
+	{ SVM_EXIT_READ_CR8,    "read_cr8" }, \
+	{ SVM_EXIT_WRITE_CR0,   "write_cr0" }, \
+	{ SVM_EXIT_WRITE_CR3,   "write_cr3" }, \
+	{ SVM_EXIT_WRITE_CR4,   "write_cr4" }, \
+	{ SVM_EXIT_WRITE_CR8,   "write_cr8" }, \
+	{ SVM_EXIT_READ_DR0,    "read_dr0" }, \
+	{ SVM_EXIT_READ_DR1,    "read_dr1" }, \
+	{ SVM_EXIT_READ_DR2,    "read_dr2" }, \
+	{ SVM_EXIT_READ_DR3,    "read_dr3" }, \
+	{ SVM_EXIT_WRITE_DR0,   "write_dr0" }, \
+	{ SVM_EXIT_WRITE_DR1,   "write_dr1" }, \
+	{ SVM_EXIT_WRITE_DR2,   "write_dr2" }, \
+	{ SVM_EXIT_WRITE_DR3,   "write_dr3" }, \
+	{ SVM_EXIT_WRITE_DR5,   "write_dr5" }, \
+	{ SVM_EXIT_WRITE_DR7,   "write_dr7" }, \
+	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,       "DB excp" }, \
+	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,       "BP excp" }, \
+	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,       "UD excp" }, \
+	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,       "PF excp" }, \
+	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,       "NM excp" }, \
+	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,       "MC excp" }, \
+	{ SVM_EXIT_INTR,        "interrupt" }, \
+	{ SVM_EXIT_NMI,         "nmi" }, \
+	{ SVM_EXIT_SMI,         "smi" }, \
+	{ SVM_EXIT_INIT,        "init" }, \
+	{ SVM_EXIT_VINTR,       "vintr" }, \
+	{ SVM_EXIT_CPUID,       "cpuid" }, \
+	{ SVM_EXIT_INVD,        "invd" }, \
+	{ SVM_EXIT_HLT,         "hlt" }, \
+	{ SVM_EXIT_INVLPG,      "invlpg" }, \
+	{ SVM_EXIT_INVLPGA,     "invlpga" }, \
+	{ SVM_EXIT_IOIO,        "io" }, \
+	{ SVM_EXIT_MSR,         "msr" }, \
+	{ SVM_EXIT_TASK_SWITCH, "task_switch" }, \
+	{ SVM_EXIT_SHUTDOWN,    "shutdown" }, \
+	{ SVM_EXIT_VMRUN,       "vmrun" }, \
+	{ SVM_EXIT_VMMCALL,     "hypercall" }, \
+	{ SVM_EXIT_VMLOAD,      "vmload" }, \
+	{ SVM_EXIT_VMSAVE,      "vmsave" }, \
+	{ SVM_EXIT_STGI,        "stgi" }, \
+	{ SVM_EXIT_CLGI,        "clgi" }, \
+	{ SVM_EXIT_SKINIT,      "skinit" }, \
+	{ SVM_EXIT_WBINVD,      "wbinvd" }, \
+	{ SVM_EXIT_MONITOR,     "monitor" }, \
+	{ SVM_EXIT_MWAIT,       "mwait" }, \
+	{ SVM_EXIT_XSETBV,      "xsetbv" }, \
+	{ SVM_EXIT_NPF,         "npf" }
+
+#ifdef __KERNEL__
+
 enum {
 	INTERCEPT_INTR,
 	INTERCEPT_NMI,
@@ -264,81 +393,6 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #define SVM_EXITINFO_REG_MASK 0x0F
 
-#define	SVM_EXIT_READ_CR0 	0x000
-#define	SVM_EXIT_READ_CR3 	0x003
-#define	SVM_EXIT_READ_CR4 	0x004
-#define	SVM_EXIT_READ_CR8 	0x008
-#define	SVM_EXIT_WRITE_CR0 	0x010
-#define	SVM_EXIT_WRITE_CR3 	0x013
-#define	SVM_EXIT_WRITE_CR4 	0x014
-#define	SVM_EXIT_WRITE_CR8 	0x018
-#define	SVM_EXIT_READ_DR0 	0x020
-#define	SVM_EXIT_READ_DR1 	0x021
-#define	SVM_EXIT_READ_DR2 	0x022
-#define	SVM_EXIT_READ_DR3 	0x023
-#define	SVM_EXIT_READ_DR4 	0x024
-#define	SVM_EXIT_READ_DR5 	0x025
-#define	SVM_EXIT_READ_DR6 	0x026
-#define	SVM_EXIT_READ_DR7 	0x027
-#define	SVM_EXIT_WRITE_DR0 	0x030
-#define	SVM_EXIT_WRITE_DR1 	0x031
-#define	SVM_EXIT_WRITE_DR2 	0x032
-#define	SVM_EXIT_WRITE_DR3 	0x033
-#define	SVM_EXIT_WRITE_DR4 	0x034
-#define	SVM_EXIT_WRITE_DR5 	0x035
-#define	SVM_EXIT_WRITE_DR6 	0x036
-#define	SVM_EXIT_WRITE_DR7 	0x037
-#define SVM_EXIT_EXCP_BASE      0x040
-#define SVM_EXIT_INTR		0x060
-#define SVM_EXIT_NMI		0x061
-#define SVM_EXIT_SMI		0x062
-#define SVM_EXIT_INIT		0x063
-#define SVM_EXIT_VINTR		0x064
-#define SVM_EXIT_CR0_SEL_WRITE	0x065
-#define SVM_EXIT_IDTR_READ	0x066
-#define SVM_EXIT_GDTR_READ	0x067
-#define SVM_EXIT_LDTR_READ	0x068
-#define SVM_EXIT_TR_READ	0x069
-#define SVM_EXIT_IDTR_WRITE	0x06a
-#define SVM_EXIT_GDTR_WRITE	0x06b
-#define SVM_EXIT_LDTR_WRITE	0x06c
-#define SVM_EXIT_TR_WRITE	0x06d
-#define SVM_EXIT_RDTSC		0x06e
-#define SVM_EXIT_RDPMC		0x06f
-#define SVM_EXIT_PUSHF		0x070
-#define SVM_EXIT_POPF		0x071
-#define SVM_EXIT_CPUID		0x072
-#define SVM_EXIT_RSM		0x073
-#define SVM_EXIT_IRET		0x074
-#define SVM_EXIT_SWINT		0x075
-#define SVM_EXIT_INVD		0x076
-#define SVM_EXIT_PAUSE		0x077
-#define SVM_EXIT_HLT		0x078
-#define SVM_EXIT_INVLPG		0x079
-#define SVM_EXIT_INVLPGA	0x07a
-#define SVM_EXIT_IOIO		0x07b
-#define SVM_EXIT_MSR		0x07c
-#define SVM_EXIT_TASK_SWITCH	0x07d
-#define SVM_EXIT_FERR_FREEZE	0x07e
-#define SVM_EXIT_SHUTDOWN	0x07f
-#define SVM_EXIT_VMRUN		0x080
-#define SVM_EXIT_VMMCALL	0x081
-#define SVM_EXIT_VMLOAD		0x082
-#define SVM_EXIT_VMSAVE		0x083
-#define SVM_EXIT_STGI		0x084
-#define SVM_EXIT_CLGI		0x085
-#define SVM_EXIT_SKINIT		0x086
-#define SVM_EXIT_RDTSCP		0x087
-#define SVM_EXIT_ICEBP		0x088
-#define SVM_EXIT_WBINVD		0x089
-#define SVM_EXIT_MONITOR	0x08a
-#define SVM_EXIT_MWAIT		0x08b
-#define SVM_EXIT_MWAIT_COND	0x08c
-#define SVM_EXIT_XSETBV		0x08d
-#define SVM_EXIT_NPF  		0x400
-
-#define SVM_EXIT_ERR		-1
-
 #define SVM_CR0_SELECTIVE_MASK (X86_CR0_TS | X86_CR0_MP)
 
 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
@@ -350,3 +404,4 @@ struct __attribute__ ((__packed__)) vmcb {
 
 #endif
 
+#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 74fcb963595b..36ec21c36d68 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -25,6 +25,88 @@
  *
  */
 
+#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+
+#define EXIT_REASON_EXCEPTION_NMI       0
+#define EXIT_REASON_EXTERNAL_INTERRUPT  1
+#define EXIT_REASON_TRIPLE_FAULT        2
+
+#define EXIT_REASON_PENDING_INTERRUPT   7
+#define EXIT_REASON_NMI_WINDOW          8
+#define EXIT_REASON_TASK_SWITCH         9
+#define EXIT_REASON_CPUID               10
+#define EXIT_REASON_HLT                 12
+#define EXIT_REASON_INVD                13
+#define EXIT_REASON_INVLPG              14
+#define EXIT_REASON_RDPMC               15
+#define EXIT_REASON_RDTSC               16
+#define EXIT_REASON_VMCALL              18
+#define EXIT_REASON_VMCLEAR             19
+#define EXIT_REASON_VMLAUNCH            20
+#define EXIT_REASON_VMPTRLD             21
+#define EXIT_REASON_VMPTRST             22
+#define EXIT_REASON_VMREAD              23
+#define EXIT_REASON_VMRESUME            24
+#define EXIT_REASON_VMWRITE             25
+#define EXIT_REASON_VMOFF               26
+#define EXIT_REASON_VMON                27
+#define EXIT_REASON_CR_ACCESS           28
+#define EXIT_REASON_DR_ACCESS           29
+#define EXIT_REASON_IO_INSTRUCTION      30
+#define EXIT_REASON_MSR_READ            31
+#define EXIT_REASON_MSR_WRITE           32
+#define EXIT_REASON_INVALID_STATE       33
+#define EXIT_REASON_MWAIT_INSTRUCTION   36
+#define EXIT_REASON_MONITOR_INSTRUCTION 39
+#define EXIT_REASON_PAUSE_INSTRUCTION   40
+#define EXIT_REASON_MCE_DURING_VMENTRY  41
+#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+#define EXIT_REASON_APIC_ACCESS         44
+#define EXIT_REASON_EPT_VIOLATION       48
+#define EXIT_REASON_EPT_MISCONFIG       49
+#define EXIT_REASON_WBINVD              54
+#define EXIT_REASON_XSETBV              55
+#define EXIT_REASON_INVPCID             58
+
+#define VMX_EXIT_REASONS \
+	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
+	{ EXIT_REASON_EXTERNAL_INTERRUPT,    "EXTERNAL_INTERRUPT" }, \
+	{ EXIT_REASON_TRIPLE_FAULT,          "TRIPLE_FAULT" }, \
+	{ EXIT_REASON_PENDING_INTERRUPT,     "PENDING_INTERRUPT" }, \
+	{ EXIT_REASON_NMI_WINDOW,            "NMI_WINDOW" }, \
+	{ EXIT_REASON_TASK_SWITCH,           "TASK_SWITCH" }, \
+	{ EXIT_REASON_CPUID,                 "CPUID" }, \
+	{ EXIT_REASON_HLT,                   "HLT" }, \
+	{ EXIT_REASON_INVLPG,                "INVLPG" }, \
+	{ EXIT_REASON_RDPMC,                 "RDPMC" }, \
+	{ EXIT_REASON_RDTSC,                 "RDTSC" }, \
+	{ EXIT_REASON_VMCALL,                "VMCALL" }, \
+	{ EXIT_REASON_VMCLEAR,               "VMCLEAR" }, \
+	{ EXIT_REASON_VMLAUNCH,              "VMLAUNCH" }, \
+	{ EXIT_REASON_VMPTRLD,               "VMPTRLD" }, \
+	{ EXIT_REASON_VMPTRST,               "VMPTRST" }, \
+	{ EXIT_REASON_VMREAD,                "VMREAD" }, \
+	{ EXIT_REASON_VMRESUME,              "VMRESUME" }, \
+	{ EXIT_REASON_VMWRITE,               "VMWRITE" }, \
+	{ EXIT_REASON_VMOFF,                 "VMOFF" }, \
+	{ EXIT_REASON_VMON,                  "VMON" }, \
+	{ EXIT_REASON_CR_ACCESS,             "CR_ACCESS" }, \
+	{ EXIT_REASON_DR_ACCESS,             "DR_ACCESS" }, \
+	{ EXIT_REASON_IO_INSTRUCTION,        "IO_INSTRUCTION" }, \
+	{ EXIT_REASON_MSR_READ,              "MSR_READ" }, \
+	{ EXIT_REASON_MSR_WRITE,             "MSR_WRITE" }, \
+	{ EXIT_REASON_MWAIT_INSTRUCTION,     "MWAIT_INSTRUCTION" }, \
+	{ EXIT_REASON_MONITOR_INSTRUCTION,   "MONITOR_INSTRUCTION" }, \
+	{ EXIT_REASON_PAUSE_INSTRUCTION,     "PAUSE_INSTRUCTION" }, \
+	{ EXIT_REASON_MCE_DURING_VMENTRY,    "MCE_DURING_VMENTRY" }, \
+	{ EXIT_REASON_TPR_BELOW_THRESHOLD,   "TPR_BELOW_THRESHOLD" }, \
+	{ EXIT_REASON_APIC_ACCESS,           "APIC_ACCESS" }, \
+	{ EXIT_REASON_EPT_VIOLATION,         "EPT_VIOLATION" }, \
+	{ EXIT_REASON_EPT_MISCONFIG,         "EPT_MISCONFIG" }, \
+	{ EXIT_REASON_WBINVD,                "WBINVD" }
+
+#ifdef __KERNEL__
+
 #include <linux/types.h>
 
 /*
@@ -241,49 +323,6 @@ enum vmcs_field {
 	HOST_RIP                        = 0x00006c16,
 };
 
-#define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
-
-#define EXIT_REASON_EXCEPTION_NMI       0
-#define EXIT_REASON_EXTERNAL_INTERRUPT  1
-#define EXIT_REASON_TRIPLE_FAULT        2
-
-#define EXIT_REASON_PENDING_INTERRUPT   7
-#define EXIT_REASON_NMI_WINDOW		8
-#define EXIT_REASON_TASK_SWITCH         9
-#define EXIT_REASON_CPUID               10
-#define EXIT_REASON_HLT                 12
-#define EXIT_REASON_INVD                13
-#define EXIT_REASON_INVLPG              14
-#define EXIT_REASON_RDPMC               15
-#define EXIT_REASON_RDTSC               16
-#define EXIT_REASON_VMCALL              18
-#define EXIT_REASON_VMCLEAR             19
-#define EXIT_REASON_VMLAUNCH            20
-#define EXIT_REASON_VMPTRLD             21
-#define EXIT_REASON_VMPTRST             22
-#define EXIT_REASON_VMREAD              23
-#define EXIT_REASON_VMRESUME            24
-#define EXIT_REASON_VMWRITE             25
-#define EXIT_REASON_VMOFF               26
-#define EXIT_REASON_VMON                27
-#define EXIT_REASON_CR_ACCESS           28
-#define EXIT_REASON_DR_ACCESS           29
-#define EXIT_REASON_IO_INSTRUCTION      30
-#define EXIT_REASON_MSR_READ            31
-#define EXIT_REASON_MSR_WRITE           32
-#define EXIT_REASON_INVALID_STATE	33
-#define EXIT_REASON_MWAIT_INSTRUCTION   36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION   40
-#define EXIT_REASON_MCE_DURING_VMENTRY	 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS         44
-#define EXIT_REASON_EPT_VIOLATION       48
-#define EXIT_REASON_EPT_MISCONFIG       49
-#define EXIT_REASON_WBINVD		54
-#define EXIT_REASON_XSETBV		55
-#define EXIT_REASON_INVPCID		58
-
 /*
  * Interruption-information format
  */
@@ -488,3 +527,5 @@ enum vm_instruction_error_number {
 };
 
 #endif
+
+#endif
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index a71faf727ff3..bca63f04dccb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -183,95 +183,6 @@ TRACE_EVENT(kvm_apic,
 #define KVM_ISA_VMX   1
 #define KVM_ISA_SVM   2
 
-#define VMX_EXIT_REASONS \
-	{ EXIT_REASON_EXCEPTION_NMI,		"EXCEPTION_NMI" }, \
-	{ EXIT_REASON_EXTERNAL_INTERRUPT,	"EXTERNAL_INTERRUPT" }, \
-	{ EXIT_REASON_TRIPLE_FAULT,		"TRIPLE_FAULT" }, \
-	{ EXIT_REASON_PENDING_INTERRUPT,	"PENDING_INTERRUPT" }, \
-	{ EXIT_REASON_NMI_WINDOW,		"NMI_WINDOW" }, \
-	{ EXIT_REASON_TASK_SWITCH,		"TASK_SWITCH" }, \
-	{ EXIT_REASON_CPUID,			"CPUID" }, \
-	{ EXIT_REASON_HLT,			"HLT" }, \
-	{ EXIT_REASON_INVLPG,			"INVLPG" }, \
-	{ EXIT_REASON_RDPMC,			"RDPMC" }, \
-	{ EXIT_REASON_RDTSC,			"RDTSC" }, \
-	{ EXIT_REASON_VMCALL,			"VMCALL" }, \
-	{ EXIT_REASON_VMCLEAR,			"VMCLEAR" }, \
-	{ EXIT_REASON_VMLAUNCH,			"VMLAUNCH" }, \
-	{ EXIT_REASON_VMPTRLD,			"VMPTRLD" }, \
-	{ EXIT_REASON_VMPTRST,			"VMPTRST" }, \
-	{ EXIT_REASON_VMREAD,			"VMREAD" }, \
-	{ EXIT_REASON_VMRESUME,			"VMRESUME" }, \
-	{ EXIT_REASON_VMWRITE,			"VMWRITE" }, \
-	{ EXIT_REASON_VMOFF,			"VMOFF" }, \
-	{ EXIT_REASON_VMON,			"VMON" }, \
-	{ EXIT_REASON_CR_ACCESS,		"CR_ACCESS" }, \
-	{ EXIT_REASON_DR_ACCESS,		"DR_ACCESS" }, \
-	{ EXIT_REASON_IO_INSTRUCTION,		"IO_INSTRUCTION" }, \
-	{ EXIT_REASON_MSR_READ,			"MSR_READ" }, \
-	{ EXIT_REASON_MSR_WRITE,		"MSR_WRITE" }, \
-	{ EXIT_REASON_MWAIT_INSTRUCTION,	"MWAIT_INSTRUCTION" }, \
-	{ EXIT_REASON_MONITOR_INSTRUCTION,	"MONITOR_INSTRUCTION" }, \
-	{ EXIT_REASON_PAUSE_INSTRUCTION,	"PAUSE_INSTRUCTION" }, \
-	{ EXIT_REASON_MCE_DURING_VMENTRY,	"MCE_DURING_VMENTRY" }, \
-	{ EXIT_REASON_TPR_BELOW_THRESHOLD,	"TPR_BELOW_THRESHOLD" },	\
-	{ EXIT_REASON_APIC_ACCESS,		"APIC_ACCESS" }, \
-	{ EXIT_REASON_EPT_VIOLATION,		"EPT_VIOLATION" }, \
-	{ EXIT_REASON_EPT_MISCONFIG,		"EPT_MISCONFIG" }, \
-	{ EXIT_REASON_WBINVD,			"WBINVD" }
-
-#define SVM_EXIT_REASONS \
-	{ SVM_EXIT_READ_CR0,			"read_cr0" }, \
-	{ SVM_EXIT_READ_CR3,			"read_cr3" }, \
-	{ SVM_EXIT_READ_CR4,			"read_cr4" }, \
-	{ SVM_EXIT_READ_CR8,			"read_cr8" }, \
-	{ SVM_EXIT_WRITE_CR0,			"write_cr0" }, \
-	{ SVM_EXIT_WRITE_CR3,			"write_cr3" }, \
-	{ SVM_EXIT_WRITE_CR4,			"write_cr4" }, \
-	{ SVM_EXIT_WRITE_CR8,			"write_cr8" }, \
-	{ SVM_EXIT_READ_DR0,			"read_dr0" }, \
-	{ SVM_EXIT_READ_DR1,			"read_dr1" }, \
-	{ SVM_EXIT_READ_DR2,			"read_dr2" }, \
-	{ SVM_EXIT_READ_DR3,			"read_dr3" }, \
-	{ SVM_EXIT_WRITE_DR0,			"write_dr0" }, \
-	{ SVM_EXIT_WRITE_DR1,			"write_dr1" }, \
-	{ SVM_EXIT_WRITE_DR2,			"write_dr2" }, \
-	{ SVM_EXIT_WRITE_DR3,			"write_dr3" }, \
-	{ SVM_EXIT_WRITE_DR5,			"write_dr5" }, \
-	{ SVM_EXIT_WRITE_DR7,			"write_dr7" }, \
-	{ SVM_EXIT_EXCP_BASE + DB_VECTOR,	"DB excp" }, \
-	{ SVM_EXIT_EXCP_BASE + BP_VECTOR,	"BP excp" }, \
-	{ SVM_EXIT_EXCP_BASE + UD_VECTOR,	"UD excp" }, \
-	{ SVM_EXIT_EXCP_BASE + PF_VECTOR,	"PF excp" }, \
-	{ SVM_EXIT_EXCP_BASE + NM_VECTOR,	"NM excp" }, \
-	{ SVM_EXIT_EXCP_BASE + MC_VECTOR,	"MC excp" }, \
-	{ SVM_EXIT_INTR,			"interrupt" }, \
-	{ SVM_EXIT_NMI,				"nmi" }, \
-	{ SVM_EXIT_SMI,				"smi" }, \
-	{ SVM_EXIT_INIT,			"init" }, \
-	{ SVM_EXIT_VINTR,			"vintr" }, \
-	{ SVM_EXIT_CPUID,			"cpuid" }, \
-	{ SVM_EXIT_INVD,			"invd" }, \
-	{ SVM_EXIT_HLT,				"hlt" }, \
-	{ SVM_EXIT_INVLPG,			"invlpg" }, \
-	{ SVM_EXIT_INVLPGA,			"invlpga" }, \
-	{ SVM_EXIT_IOIO,			"io" }, \
-	{ SVM_EXIT_MSR,				"msr" }, \
-	{ SVM_EXIT_TASK_SWITCH,			"task_switch" }, \
-	{ SVM_EXIT_SHUTDOWN,			"shutdown" }, \
-	{ SVM_EXIT_VMRUN,			"vmrun" }, \
-	{ SVM_EXIT_VMMCALL,			"hypercall" }, \
-	{ SVM_EXIT_VMLOAD,			"vmload" }, \
-	{ SVM_EXIT_VMSAVE,			"vmsave" }, \
-	{ SVM_EXIT_STGI,			"stgi" }, \
-	{ SVM_EXIT_CLGI,			"clgi" }, \
-	{ SVM_EXIT_SKINIT,			"skinit" }, \
-	{ SVM_EXIT_WBINVD,			"wbinvd" }, \
-	{ SVM_EXIT_MONITOR,			"monitor" }, \
-	{ SVM_EXIT_MWAIT,			"mwait" }, \
-	{ SVM_EXIT_XSETBV,			"xsetbv" }, \
-	{ SVM_EXIT_NPF,				"npf" }
-
 /*
  * Tracepoint for kvm guest exit:
  */
-- 
cgit v1.2.3


From 4cd8daf05c7071ac80008c8d4368860110fa6466 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Wed, 19 Sep 2012 10:49:00 -0700
Subject: x86/PCI: Clear host bridge aperture struct resource

Use kzalloc() so the struct resource doesn't contain garbage in
fields we don't initialize.

[bhelgaas: changelog]
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: x86@kernel.org
---
 arch/x86/pci/acpi.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 505acdd6d600..192397c98606 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -305,7 +305,6 @@ setup_resource(struct acpi_resource *acpi_res, void *data)
 	res->flags = flags;
 	res->start = start;
 	res->end = end;
-	res->child = NULL;
 
 	if (!pci_use_crs) {
 		dev_printk(KERN_DEBUG, &info->bridge->dev,
@@ -434,7 +433,7 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device,
 
 	size = sizeof(*info->res) * info->res_num;
 	info->res_num = 0;
-	info->res = kmalloc(size, GFP_KERNEL);
+	info->res = kzalloc(size, GFP_KERNEL);
 	if (!info->res)
 		return;
 
-- 
cgit v1.2.3


From 7a84428af7ca6a847f058c9ff244a18a2664fd1b Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 21 Sep 2012 11:58:03 -0600
Subject: KVM: Add resampling irqfds for level triggered interrupts

To emulate level triggered interrupts, add a resample option to
KVM_IRQFD.  When specified, a new resamplefd is provided that notifies
the user when the irqchip has been resampled by the VM.  This may, for
instance, indicate an EOI.  Also in this mode, posting of an interrupt
through an irqfd only asserts the interrupt.  On resampling, the
interrupt is automatically de-asserted prior to user notification.
This enables level triggered interrupts to be posted and re-enabled
from vfio with no userspace intervention.

All resampling irqfds can make use of a single irq source ID, so we
reserve a new one for this interface.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  13 ++++
 arch/x86/kvm/x86.c                |   4 +
 include/linux/kvm.h               |  12 ++-
 include/linux/kvm_host.h          |   5 +-
 virt/kvm/eventfd.c                | 150 +++++++++++++++++++++++++++++++++++++-
 virt/kvm/irq_comm.c               |   6 ++
 6 files changed, 184 insertions(+), 6 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 36befa775fdb..f6ec3a92e621 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1950,6 +1950,19 @@ the guest using the specified gsi pin.  The irqfd is removed using
 the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
 and kvm_irqfd.gsi.
 
+With KVM_CAP_IRQFD_RESAMPLE, KVM_IRQFD supports a de-assert and notify
+mechanism allowing emulation of level-triggered, irqfd-based
+interrupts.  When KVM_IRQFD_FLAG_RESAMPLE is set the user must pass an
+additional eventfd in the kvm_irqfd.resamplefd field.  When operating
+in resample mode, posting of an interrupt through kvm_irq.fd asserts
+the specified gsi in the irqchip.  When the irqchip is resampled, such
+as from an EOI, the gsi is de-asserted and the user is notifed via
+kvm_irqfd.resamplefd.  It is the user's responsibility to re-queue
+the interrupt if the device making use of it still requires service.
+Note that closing the resamplefd is not sufficient to disable the
+irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
+and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
+
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 Capability: KVM_CAP_PPC_ALLOC_HTAB
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index fc2a0a132e4b..7d44204c6041 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2176,6 +2176,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_PCI_2_3:
 	case KVM_CAP_KVMCLOCK_CTRL:
 	case KVM_CAP_READONLY_MEM:
+	case KVM_CAP_IRQFD_RESAMPLE:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
@@ -6268,6 +6269,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
 	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
+	/* Reserve bit 1 of irq_sources_bitmap for irqfd-resampler */
+	set_bit(KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+		&kvm->arch.irq_sources_bitmap);
 
 	raw_spin_lock_init(&kvm->arch.tsc_write_lock);
 	mutex_init(&kvm->arch.apic_map_lock);
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index d808694673f9..0a6d6ba44c85 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -625,6 +625,7 @@ struct kvm_ppc_smmu_info {
 #ifdef __KVM_HAVE_READONLY_MEM
 #define KVM_CAP_READONLY_MEM 81
 #endif
+#define KVM_CAP_IRQFD_RESAMPLE 82
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -690,12 +691,21 @@ struct kvm_xen_hvm_config {
 #endif
 
 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
+/*
+ * Available with KVM_CAP_IRQFD_RESAMPLE
+ *
+ * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
+ * the irqfd to operate in resampling mode for level triggered interrupt
+ * emlation.  See Documentation/virtual/kvm/api.txt.
+ */
+#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
 struct kvm_irqfd {
 	__u32 fd;
 	__u32 gsi;
 	__u32 flags;
-	__u8  pad[20];
+	__u32 resamplefd;
+	__u8  pad[16];
 };
 
 struct kvm_clock_data {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 80bfc880921e..2850656e2e96 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -119,7 +119,8 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
 
-#define KVM_USERSPACE_IRQ_SOURCE_ID	0
+#define KVM_USERSPACE_IRQ_SOURCE_ID		0
+#define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
 struct kvm;
 struct kvm_vcpu;
@@ -343,6 +344,8 @@ struct kvm {
 	struct {
 		spinlock_t        lock;
 		struct list_head  items;
+		struct list_head  resampler_list;
+		struct mutex      resampler_lock;
 	} irqfds;
 	struct list_head ioeventfds;
 #endif
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 7d7e2aaffece..356965c9d107 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -43,6 +43,31 @@
  * --------------------------------------------------------------------
  */
 
+/*
+ * Resampling irqfds are a special variety of irqfds used to emulate
+ * level triggered interrupts.  The interrupt is asserted on eventfd
+ * trigger.  On acknowledgement through the irq ack notifier, the
+ * interrupt is de-asserted and userspace is notified through the
+ * resamplefd.  All resamplers on the same gsi are de-asserted
+ * together, so we don't need to track the state of each individual
+ * user.  We can also therefore share the same irq source ID.
+ */
+struct _irqfd_resampler {
+	struct kvm *kvm;
+	/*
+	 * List of resampling struct _irqfd objects sharing this gsi.
+	 * RCU list modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head list;
+	struct kvm_irq_ack_notifier notifier;
+	/*
+	 * Entry in list of kvm->irqfd.resampler_list.  Use for sharing
+	 * resamplers among irqfds on the same gsi.
+	 * Accessed and modified under kvm->irqfds.resampler_lock
+	 */
+	struct list_head link;
+};
+
 struct _irqfd {
 	/* Used for MSI fast-path */
 	struct kvm *kvm;
@@ -52,6 +77,12 @@ struct _irqfd {
 	/* Used for level IRQ fast-path */
 	int gsi;
 	struct work_struct inject;
+	/* The resampler used by this irqfd (resampler-only) */
+	struct _irqfd_resampler *resampler;
+	/* Eventfd notified on resample (resampler-only) */
+	struct eventfd_ctx *resamplefd;
+	/* Entry in list of irqfds for a resampler (resampler-only) */
+	struct list_head resampler_link;
 	/* Used for setup/shutdown */
 	struct eventfd_ctx *eventfd;
 	struct list_head list;
@@ -67,8 +98,58 @@ irqfd_inject(struct work_struct *work)
 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
 	struct kvm *kvm = irqfd->kvm;
 
-	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
-	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+	if (!irqfd->resampler) {
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
+		kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
+	} else
+		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+			    irqfd->gsi, 1);
+}
+
+/*
+ * Since resampler irqfds share an IRQ source ID, we de-assert once
+ * then notify all of the resampler irqfds using this GSI.  We can't
+ * do multiple de-asserts or we risk racing with incoming re-asserts.
+ */
+static void
+irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
+{
+	struct _irqfd_resampler *resampler;
+	struct _irqfd *irqfd;
+
+	resampler = container_of(kian, struct _irqfd_resampler, notifier);
+
+	kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+		    resampler->notifier.gsi, 0);
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
+		eventfd_signal(irqfd->resamplefd, 1);
+
+	rcu_read_unlock();
+}
+
+static void
+irqfd_resampler_shutdown(struct _irqfd *irqfd)
+{
+	struct _irqfd_resampler *resampler = irqfd->resampler;
+	struct kvm *kvm = resampler->kvm;
+
+	mutex_lock(&kvm->irqfds.resampler_lock);
+
+	list_del_rcu(&irqfd->resampler_link);
+	synchronize_rcu();
+
+	if (list_empty(&resampler->list)) {
+		list_del(&resampler->link);
+		kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier);
+		kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+			    resampler->notifier.gsi, 0);
+		kfree(resampler);
+	}
+
+	mutex_unlock(&kvm->irqfds.resampler_lock);
 }
 
 /*
@@ -92,6 +173,11 @@ irqfd_shutdown(struct work_struct *work)
 	 */
 	flush_work_sync(&irqfd->inject);
 
+	if (irqfd->resampler) {
+		irqfd_resampler_shutdown(irqfd);
+		eventfd_ctx_put(irqfd->resamplefd);
+	}
+
 	/*
 	 * It is now safe to release the object's resources
 	 */
@@ -203,7 +289,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	struct kvm_irq_routing_table *irq_rt;
 	struct _irqfd *irqfd, *tmp;
 	struct file *file = NULL;
-	struct eventfd_ctx *eventfd = NULL;
+	struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL;
 	int ret;
 	unsigned int events;
 
@@ -231,6 +317,54 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 
 	irqfd->eventfd = eventfd;
 
+	if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) {
+		struct _irqfd_resampler *resampler;
+
+		resamplefd = eventfd_ctx_fdget(args->resamplefd);
+		if (IS_ERR(resamplefd)) {
+			ret = PTR_ERR(resamplefd);
+			goto fail;
+		}
+
+		irqfd->resamplefd = resamplefd;
+		INIT_LIST_HEAD(&irqfd->resampler_link);
+
+		mutex_lock(&kvm->irqfds.resampler_lock);
+
+		list_for_each_entry(resampler,
+				    &kvm->irqfds.resampler_list, list) {
+			if (resampler->notifier.gsi == irqfd->gsi) {
+				irqfd->resampler = resampler;
+				break;
+			}
+		}
+
+		if (!irqfd->resampler) {
+			resampler = kzalloc(sizeof(*resampler), GFP_KERNEL);
+			if (!resampler) {
+				ret = -ENOMEM;
+				mutex_unlock(&kvm->irqfds.resampler_lock);
+				goto fail;
+			}
+
+			resampler->kvm = kvm;
+			INIT_LIST_HEAD(&resampler->list);
+			resampler->notifier.gsi = irqfd->gsi;
+			resampler->notifier.irq_acked = irqfd_resampler_ack;
+			INIT_LIST_HEAD(&resampler->link);
+
+			list_add(&resampler->link, &kvm->irqfds.resampler_list);
+			kvm_register_irq_ack_notifier(kvm,
+						      &resampler->notifier);
+			irqfd->resampler = resampler;
+		}
+
+		list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
+		synchronize_rcu();
+
+		mutex_unlock(&kvm->irqfds.resampler_lock);
+	}
+
 	/*
 	 * Install our own custom wake-up handling so we are notified via
 	 * a callback whenever someone signals the underlying eventfd
@@ -276,6 +410,12 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
 	return 0;
 
 fail:
+	if (irqfd->resampler)
+		irqfd_resampler_shutdown(irqfd);
+
+	if (resamplefd && !IS_ERR(resamplefd))
+		eventfd_ctx_put(resamplefd);
+
 	if (eventfd && !IS_ERR(eventfd))
 		eventfd_ctx_put(eventfd);
 
@@ -291,6 +431,8 @@ kvm_eventfd_init(struct kvm *kvm)
 {
 	spin_lock_init(&kvm->irqfds.lock);
 	INIT_LIST_HEAD(&kvm->irqfds.items);
+	INIT_LIST_HEAD(&kvm->irqfds.resampler_list);
+	mutex_init(&kvm->irqfds.resampler_lock);
 	INIT_LIST_HEAD(&kvm->ioeventfds);
 }
 
@@ -340,7 +482,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
 int
 kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args)
 {
-	if (args->flags & ~KVM_IRQFD_FLAG_DEASSIGN)
+	if (args->flags & ~(KVM_IRQFD_FLAG_DEASSIGN | KVM_IRQFD_FLAG_RESAMPLE))
 		return -EINVAL;
 
 	if (args->flags & KVM_IRQFD_FLAG_DEASSIGN)
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 3ca89c451d6b..2eb58af7ee99 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -228,6 +228,9 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 	}
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
 	set_bit(irq_source_id, bitmap);
 unlock:
 	mutex_unlock(&kvm->irq_lock);
@@ -238,6 +241,9 @@ unlock:
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
 
 	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
-- 
cgit v1.2.3


From c863901075a42d50678616d8ee4b96ef13080498 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Fri, 21 Sep 2012 05:42:55 +0200
Subject: KVM: x86: Fix guest debug across vcpu INIT reset

If we reset a vcpu on INIT, we so far overwrote dr7 as provided by
KVM_SET_GUEST_DEBUG, and we also cleared switch_db_regs unconditionally.

Fix this by saving the dr7 used for guest debugging and calculating the
effective register value as well as switch_db_regs on any potential
change. This will change to focus of the set_guest_debug vendor op to
update_dp_bp_intercept.

Found while trying to stop on start_secondary.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  4 ++--
 arch/x86/kvm/svm.c              | 23 ++++-------------------
 arch/x86/kvm/vmx.c              | 14 +-------------
 arch/x86/kvm/x86.c              | 26 +++++++++++++++++---------
 4 files changed, 24 insertions(+), 43 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b902c98f279..c9a91368fc5e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -471,6 +471,7 @@ struct kvm_vcpu_arch {
 	unsigned long dr6;
 	unsigned long dr7;
 	unsigned long eff_db[KVM_NR_DB_REGS];
+	unsigned long guest_debug_dr7;
 
 	u64 mcg_cap;
 	u64 mcg_status;
@@ -647,8 +648,7 @@ struct kvm_x86_ops {
 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
 
-	void (*set_guest_debug)(struct kvm_vcpu *vcpu,
-				struct kvm_guest_debug *dbg);
+	void (*update_db_bp_intercept)(struct kvm_vcpu *vcpu);
 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 818fceb3091e..d017df3899ef 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1146,7 +1146,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 
 	svm_set_efer(&svm->vcpu, 0);
 	save->dr6 = 0xffff0ff0;
-	save->dr7 = 0x400;
 	kvm_set_rflags(&svm->vcpu, 2);
 	save->rip = 0x0000fff0;
 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
@@ -1643,7 +1642,7 @@ static void svm_set_segment(struct kvm_vcpu *vcpu,
 	mark_dirty(svm->vmcb, VMCB_SEG);
 }
 
-static void update_db_intercept(struct kvm_vcpu *vcpu)
+static void update_db_bp_intercept(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -1663,20 +1662,6 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 		vcpu->guest_debug = 0;
 }
 
-static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
-{
-	struct vcpu_svm *svm = to_svm(vcpu);
-
-	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
-	else
-		svm->vmcb->save.dr7 = vcpu->arch.dr7;
-
-	mark_dirty(svm->vmcb, VMCB_DR);
-
-	update_db_intercept(vcpu);
-}
-
 static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
 {
 	if (sd->next_asid > sd->max_asid) {
@@ -1748,7 +1733,7 @@ static int db_interception(struct vcpu_svm *svm)
 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
 			svm->vmcb->save.rflags &=
 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
-		update_db_intercept(&svm->vcpu);
+		update_db_bp_intercept(&svm->vcpu);
 	}
 
 	if (svm->vcpu.guest_debug &
@@ -3659,7 +3644,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 	 */
 	svm->nmi_singlestep = true;
 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
-	update_db_intercept(vcpu);
+	update_db_bp_intercept(vcpu);
 }
 
 static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
@@ -4253,7 +4238,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.vcpu_load = svm_vcpu_load,
 	.vcpu_put = svm_vcpu_put,
 
-	.set_guest_debug = svm_guest_debug,
+	.update_db_bp_intercept = update_db_bp_intercept,
 	.get_msr = svm_get_msr,
 	.set_msr = svm_set_msr,
 	.get_segment_base = svm_get_segment_base,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 30bcb953afee..5d46c905e06f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2288,16 +2288,6 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 	}
 }
 
-static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
-{
-	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
-		vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
-	else
-		vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
-
-	update_exception_bitmap(vcpu);
-}
-
 static __init int cpu_has_kvm_support(void)
 {
 	return cpu_has_vmx();
@@ -3960,8 +3950,6 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 		kvm_rip_write(vcpu, 0);
 	kvm_register_write(vcpu, VCPU_REGS_RSP, 0);
 
-	vmcs_writel(GUEST_DR7, 0x400);
-
 	vmcs_writel(GUEST_GDTR_BASE, 0);
 	vmcs_write32(GUEST_GDTR_LIMIT, 0xffff);
 
@@ -7237,7 +7225,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.vcpu_load = vmx_vcpu_load,
 	.vcpu_put = vmx_vcpu_put,
 
-	.set_guest_debug = set_guest_debug,
+	.update_db_bp_intercept = update_exception_bitmap,
 	.get_msr = vmx_get_msr,
 	.set_msr = vmx_set_msr,
 	.get_segment_base = vmx_get_segment_base,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7d44204c6041..b16d4a5bfa41 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -692,6 +692,18 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr7(struct kvm_vcpu *vcpu)
+{
+	unsigned long dr7;
+
+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+		dr7 = vcpu->arch.guest_debug_dr7;
+	else
+		dr7 = vcpu->arch.dr7;
+	kvm_x86_ops->set_dr7(vcpu, dr7);
+	vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK);
+}
+
 static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
 	switch (dr) {
@@ -717,10 +729,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		if (val & 0xffffffff00000000ULL)
 			return -1; /* #GP */
 		vcpu->arch.dr7 = (val & DR7_VOLATILE) | DR7_FIXED_1;
-		if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) {
-			kvm_x86_ops->set_dr7(vcpu, vcpu->arch.dr7);
-			vcpu->arch.switch_db_regs = (val & DR7_BP_EN_MASK);
-		}
+		kvm_update_dr7(vcpu);
 		break;
 	}
 
@@ -5851,13 +5860,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 		for (i = 0; i < KVM_NR_DB_REGS; ++i)
 			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
-		vcpu->arch.switch_db_regs =
-			(dbg->arch.debugreg[7] & DR7_BP_EN_MASK);
+		vcpu->arch.guest_debug_dr7 = dbg->arch.debugreg[7];
 	} else {
 		for (i = 0; i < KVM_NR_DB_REGS; i++)
 			vcpu->arch.eff_db[i] = vcpu->arch.db[i];
-		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
 	}
+	kvm_update_dr7(vcpu);
 
 	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
 		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu) +
@@ -5869,7 +5877,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 	 */
 	kvm_set_rflags(vcpu, rflags);
 
-	kvm_x86_ops->set_guest_debug(vcpu, dbg);
+	kvm_x86_ops->update_db_bp_intercept(vcpu);
 
 	r = 0;
 
@@ -6045,10 +6053,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 	vcpu->arch.nmi_pending = 0;
 	vcpu->arch.nmi_injected = false;
 
-	vcpu->arch.switch_db_regs = 0;
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
 	vcpu->arch.dr6 = DR6_FIXED_1;
 	vcpu->arch.dr7 = DR7_FIXED_1;
+	kvm_update_dr7(vcpu);
 
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	vcpu->arch.apf.msr_val = 0;
-- 
cgit v1.2.3


From a2db672aa305a045404615e5222ba681bab6cf58 Mon Sep 17 00:00:00 2001
From: Silas Boyd-Wickizer <sbw@mit.edu>
Date: Fri, 3 Aug 2012 12:33:27 -0700
Subject: Use get_online_cpus to avoid races involving CPU hotplug

If arch/x86/kernel/msr.c is a module, a CPU might offline or online
between the for_each_online_cpu(i) loop and the call to
register_hotcpu_notifier in msr_init or the call to
unregister_hotcpu_notifier in msr_exit. The potential races can lead
to leaks/duplicates, attempts to destroy non-existant devices, or
random pointer dereferences.

For example, in msr_init if:

        for_each_online_cpu(i) {
                err = msr_device_create(i);
                if (err != 0)
                        goto out_class;
        }
        <----- CPU offlines
        register_hotcpu_notifier(&msr_class_cpu_notifier);

and the CPU never onlines before msr_exit, then the module will never
call msr_device_destroy for the associated CPU.

This fix surrounds for_each_online_cpu and register_hotcpu_notifier or
unregister_hotcpu_notifier with get_online_cpus+put_online_cpus.

Tested on a VM.

Signed-off-by: Silas Boyd-Wickizer <sbw@mit.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/kernel/msr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index eb113693f043..a7c5661f8496 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -257,12 +257,14 @@ static int __init msr_init(void)
 		goto out_chrdev;
 	}
 	msr_class->devnode = msr_devnode;
+	get_online_cpus();
 	for_each_online_cpu(i) {
 		err = msr_device_create(i);
 		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&msr_class_cpu_notifier);
+	put_online_cpus();
 
 	err = 0;
 	goto out;
@@ -271,6 +273,7 @@ out_class:
 	i = 0;
 	for_each_online_cpu(i)
 		msr_device_destroy(i);
+	put_online_cpus();
 	class_destroy(msr_class);
 out_chrdev:
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
@@ -281,11 +284,13 @@ out:
 static void __exit msr_exit(void)
 {
 	int cpu = 0;
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		msr_device_destroy(cpu);
 	class_destroy(msr_class);
 	__unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr");
 	unregister_hotcpu_notifier(&msr_class_cpu_notifier);
+	put_online_cpus();
 }
 
 module_init(msr_init);
-- 
cgit v1.2.3


From 429227bbe55647aa42f8f63cac61e4544e248629 Mon Sep 17 00:00:00 2001
From: Silas Boyd-Wickizer <sbw@mit.edu>
Date: Fri, 3 Aug 2012 12:34:50 -0700
Subject: Use get_online_cpus to avoid races involving CPU hotplug

If arch/x86/kernel/cpuid.c is a module, a CPU might offline or online
between the for_each_online_cpu() loop and the call to
register_hotcpu_notifier in cpuid_init or the call to
unregister_hotcpu_notifier in cpuid_exit.  The potential races can
lead to leaks/duplicates, attempts to destroy non-existant devices, or
random pointer dereferences.

For example, in cpuid_exit if:

        for_each_online_cpu(cpu)
                cpuid_device_destroy(cpu);
        class_destroy(cpuid_class);
        __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
        <----- CPU onlines
        unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);

the hotcpu notifier will attempt to create a device for the
cpuid_class, which the module already destroyed.

This fix surrounds for_each_online_cpu and register_hotcpu_notifier or
unregister_hotcpu_notifier with get_online_cpus+put_online_cpus.

Tested on a VM.

Signed-off-by: Silas Boyd-Wickizer <sbw@mit.edu>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/kernel/cpuid.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 39472dd2323f..60c78917190c 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -199,12 +199,14 @@ static int __init cpuid_init(void)
 		goto out_chrdev;
 	}
 	cpuid_class->devnode = cpuid_devnode;
+	get_online_cpus();
 	for_each_online_cpu(i) {
 		err = cpuid_device_create(i);
 		if (err != 0)
 			goto out_class;
 	}
 	register_hotcpu_notifier(&cpuid_class_cpu_notifier);
+	put_online_cpus();
 
 	err = 0;
 	goto out;
@@ -214,6 +216,7 @@ out_class:
 	for_each_online_cpu(i) {
 		cpuid_device_destroy(i);
 	}
+	put_online_cpus();
 	class_destroy(cpuid_class);
 out_chrdev:
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
@@ -225,11 +228,13 @@ static void __exit cpuid_exit(void)
 {
 	int cpu = 0;
 
+	get_online_cpus();
 	for_each_online_cpu(cpu)
 		cpuid_device_destroy(cpu);
 	class_destroy(cpuid_class);
 	__unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid");
 	unregister_hotcpu_notifier(&cpuid_class_cpu_notifier);
+	put_online_cpus();
 }
 
 module_init(cpuid_init);
-- 
cgit v1.2.3


From 8d54db795dfb1049d45dc34f0dddbc5347ec5642 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 17 Aug 2012 10:22:37 -0400
Subject: xen/boot: Disable NUMA for PV guests.

The hypervisor is in charge of allocating the proper "NUMA" memory
and dealing with the CPU scheduler to keep them bound to the proper
NUMA node. The PV guests (and PVHVM) have no inkling of where they
run and do not need to know that right now. In the future we will
need to inject NUMA configuration data (if a guest spans two or more
NUMA nodes) so that the kernel can make the right choices. But those
patches are not yet present.

In the meantime, disable the NUMA capability in the PV guest, which
also fixes a bootup issue. Andre says:

"we see Dom0 crashes due to the kernel detecting the NUMA topology not
by ACPI, but directly from the northbridge (CONFIG_AMD_NUMA).

This will detect the actual NUMA config of the physical machine, but
will crash about the mismatch with Dom0's virtual memory. Variation of
the theme: Dom0 sees what it's not supposed to see.

This happens with the said config option enabled and on a machine where
this scanning is still enabled (K8 and Fam10h, not Bulldozer class)

We have this dump then:
NUMA: Warning: node ids are out of bound, from=-1 to=-1 distance=10
Scanning NUMA topology in Northbridge 24
Number of physical nodes 4
Node 0 MemBase 0000000000000000 Limit 0000000040000000
Node 1 MemBase 0000000040000000 Limit 0000000138000000
Node 2 MemBase 0000000138000000 Limit 00000001f8000000
Node 3 MemBase 00000001f8000000 Limit 0000000238000000
Initmem setup node 0 0000000000000000-0000000040000000
  NODE_DATA [000000003ffd9000 - 000000003fffffff]
Initmem setup node 1 0000000040000000-0000000138000000
  NODE_DATA [0000000137fd9000 - 0000000137ffffff]
Initmem setup node 2 0000000138000000-00000001f8000000
  NODE_DATA [00000001f095e000 - 00000001f0984fff]
Initmem setup node 3 00000001f8000000-0000000238000000
Cannot find 159744 bytes in node 3
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81d220e6>] __alloc_bootmem_node+0x43/0x96
Pid: 0, comm: swapper Not tainted 3.3.6 #1 AMD Dinar/Dinar
RIP: e030:[<ffffffff81d220e6>]  [<ffffffff81d220e6>] __alloc_bootmem_node+0x43/0x96
.. snip..
  [<ffffffff81d23024>] sparse_early_usemaps_alloc_node+0x64/0x178
  [<ffffffff81d23348>] sparse_init+0xe4/0x25a
  [<ffffffff81d16840>] paging_init+0x13/0x22
  [<ffffffff81d07fbb>] setup_arch+0x9c6/0xa9b
  [<ffffffff81683954>] ? printk+0x3c/0x3e
  [<ffffffff81d01a38>] start_kernel+0xe5/0x468
  [<ffffffff81d012cf>] x86_64_start_reservations+0xba/0xc1
  [<ffffffff81007153>] ? xen_setup_runstate_info+0x2c/0x36
  [<ffffffff81d050ee>] xen_start_kernel+0x565/0x56c
"

so we just disable NUMA scanning by setting numa_off=1.

CC: stable@vger.kernel.org
Reported-and-Tested-by: Andre Przywara <andre.przywara@amd.com>
Acked-by: Andre Przywara <andre.przywara@amd.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/setup.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c
index d11ca11d14fc..e2d62d697b5d 100644
--- a/arch/x86/xen/setup.c
+++ b/arch/x86/xen/setup.c
@@ -17,6 +17,7 @@
 #include <asm/e820.h>
 #include <asm/setup.h>
 #include <asm/acpi.h>
+#include <asm/numa.h>
 #include <asm/xen/hypervisor.h>
 #include <asm/xen/hypercall.h>
 
@@ -544,4 +545,7 @@ void __init xen_arch_setup(void)
 	disable_cpufreq();
 	WARN_ON(set_pm_idle_to_default());
 	fiddle_vdso();
+#ifdef CONFIG_NUMA
+	numa_off = 1;
+#endif
 }
-- 
cgit v1.2.3


From ffb8b233c2261b7978dc3bd759aaa19bd1a7fadf Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Fri, 21 Sep 2012 12:30:35 -0400
Subject: xen/x86: retrieve keyboard shift status flags from hypervisor.

The xen c/s 25873 allows the hypervisor to retrieve the NUMLOCK flag.
With this patch, the Linux kernel can get the state according to the
data in the BIOS.

Acked-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c         | 8 ++++++++
 include/xen/interface/platform.h | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 47b3acdc2ac5..67897152237c 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1440,11 +1440,19 @@ asmlinkage void __init xen_start_kernel(void)
 		const struct dom0_vga_console_info *info =
 			(void *)((char *)xen_start_info +
 				 xen_start_info->console.dom0.info_off);
+		struct xen_platform_op op = {
+			.cmd = XENPF_firmware_info,
+			.interface_version = XENPF_INTERFACE_VERSION,
+			.u.firmware_info.type = XEN_FW_KBD_SHIFT_FLAGS,
+		};
 
 		xen_init_vga(info, xen_start_info->console.dom0.info_size);
 		xen_start_info->console.domU.mfn = 0;
 		xen_start_info->console.domU.evtchn = 0;
 
+		if (HYPERVISOR_dom0_op(&op) == 0)
+			boot_params.kbd_status = op.u.firmware_info.u.kbd_shift_flags;
+
 		xen_init_apic();
 
 		/* Make sure ACS will be enabled */
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index a3275a850e54..54ad6f9e4725 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -112,6 +112,7 @@ DEFINE_GUEST_HANDLE_STRUCT(xenpf_platform_quirk_t);
 #define XEN_FW_DISK_INFO          1 /* from int 13 AH=08/41/48 */
 #define XEN_FW_DISK_MBR_SIGNATURE 2 /* from MBR offset 0x1b8 */
 #define XEN_FW_VBEDDC_INFO        3 /* from int 10 AX=4f15 */
+#define XEN_FW_KBD_SHIFT_FLAGS    5 /* Int16, Fn02: Get keyboard shift flags. */
 struct xenpf_firmware_info {
 	/* IN variables. */
 	uint32_t type;
@@ -142,6 +143,8 @@ struct xenpf_firmware_info {
 			/* must refer to 128-byte buffer */
 			GUEST_HANDLE(uchar) edid;
 		} vbeddc_info; /* XEN_FW_VBEDDC_INFO */
+
+		uint8_t kbd_shift_flags; /* XEN_FW_KBD_SHIFT_FLAGS */
 	} u;
 };
 DEFINE_GUEST_HANDLE_STRUCT(xenpf_firmware_info_t);
-- 
cgit v1.2.3


From aa387d630cfed1a694a9c8c61fba3877ba8d4f07 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@suse.com>
Date: Thu, 9 Feb 2012 11:33:51 +0800
Subject: xen/vga: add the xen EFI video mode support

In order to add xen EFI frambebuffer video support, it is required to add
xen-efi's new video type (XEN_VGATYPE_EFI_LFB) case and handle it in the
function xen_init_vga and set the video type to VIDEO_TYPE_EFI to enable
efi video mode.

The original patch from which this was broken out from:
 http://marc.info/?i=4E099AA6020000780004A4C6@nat28.tlf.novell.com

Signed-off-by: Jan Beulich <JBeulich@novell.com>
Signed-off-by: Tang Liang <liang.tang@oracle.com>
[v2: The original author is Jan Beulich and Liang Tang ported it to upstream]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/vga.c          | 7 +++++++
 include/xen/interface/xen.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/vga.c b/arch/x86/xen/vga.c
index 1cd7f4d11e29..6722e3733f02 100644
--- a/arch/x86/xen/vga.c
+++ b/arch/x86/xen/vga.c
@@ -35,6 +35,7 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
 			info->u.text_mode_3.font_height;
 		break;
 
+	case XEN_VGATYPE_EFI_LFB:
 	case XEN_VGATYPE_VESA_LFB:
 		if (size < offsetof(struct dom0_vga_console_info,
 				    u.vesa_lfb.gbl_caps))
@@ -54,6 +55,12 @@ void __init xen_init_vga(const struct dom0_vga_console_info *info, size_t size)
 		screen_info->blue_pos = info->u.vesa_lfb.blue_pos;
 		screen_info->rsvd_size = info->u.vesa_lfb.rsvd_size;
 		screen_info->rsvd_pos = info->u.vesa_lfb.rsvd_pos;
+
+		if (info->video_type == XEN_VGATYPE_EFI_LFB) {
+			screen_info->orig_video_isVGA = VIDEO_TYPE_EFI;
+			break;
+		}
+
 		if (size >= offsetof(struct dom0_vga_console_info,
 				     u.vesa_lfb.gbl_caps)
 		    + sizeof(info->u.vesa_lfb.gbl_caps))
diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h
index 1e0df6b7d3b3..886a5d80a18f 100644
--- a/include/xen/interface/xen.h
+++ b/include/xen/interface/xen.h
@@ -454,6 +454,7 @@ struct dom0_vga_console_info {
 	uint8_t video_type;
 #define XEN_VGATYPE_TEXT_MODE_3 0x03
 #define XEN_VGATYPE_VESA_LFB    0x23
+#define XEN_VGATYPE_EFI_LFB     0x70
 
 	union {
 		struct {
-- 
cgit v1.2.3


From b3c869d35b9b014f63ac0beacd31c57372084d01 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 12:42:27 -0400
Subject: jiffies: Remove compile time assumptions about CLOCK_TICK_RATE

CLOCK_TICK_RATE is used to accurately caclulate exactly how
a tick will be at a given HZ.

This is useful, because while we'd expect NSEC_PER_SEC/HZ,
the underlying hardware will have some granularity limit,
so we won't be able to have exactly HZ ticks per second.

This slight error can cause timekeeping quality problems
when using the jiffies or other jiffies driven clocksources.
Thus we currently use compile time CLOCK_TICK_RATE value to
generate SHIFTED_HZ and NSEC_PER_JIFFIES, which we then use
to adjust the jiffies clocksource to correct this error.

Unfortunately though, since CLOCK_TICK_RATE is a compile
time value, and the jiffies clocksource is registered very
early during boot, there are a number of cases where there
are different possible hardware timers that have different
tick rates. This causes problems in cases like ARM where
there are numerous different types of hardware, each having
their own compile-time CLOCK_TICK_RATE, making it hard to
accurately support different hardware with a single kernel.

For the most part, this doesn't matter all that much, as not
too many systems actually utilize the jiffies or jiffies driven
clocksource. Usually there are other highres clocksources
who's granularity error is negligable.

Even so, we have some complicated calcualtions that we do
everywhere to handle these edge cases.

This patch removes the compile time SHIFTED_HZ value, and
introduces a register_refined_jiffies() function. This results
in the default jiffies clock as being assumed a perfect HZ
freq, and allows archtectures that care about jiffies accuracy
to call register_refined_jiffies() with the tick rate, specified
dynamically at boot.

This allows us, where necessary, to not have a compile time
CLOCK_TICK_RATE constant, simplifies the jiffies code, and
still provides a way to have an accurate jiffies clock.

NOTE: Since this patch does not add register_refinied_jiffies()
calls for every arch, it may cause time quality regressions
in some cases. Its likely these will not be noticable, but
if they are an issue, adding the following to the end of
setup_arch() should resolve the regression:
	register_refinied_jiffies(CLOCK_TICK_RATE)

Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/kernel/setup.c |  3 +++
 include/linux/jiffies.h | 15 ++-------------
 kernel/time/jiffies.c   | 32 +++++++++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index f4b9b80e1b95..4062f15bfbdd 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -68,6 +68,7 @@
 #include <linux/percpu.h>
 #include <linux/crash_dump.h>
 #include <linux/tboot.h>
+#include <linux/jiffies.h>
 
 #include <video/edid.h>
 
@@ -1034,6 +1035,8 @@ void __init setup_arch(char **cmdline_p)
 	mcheck_init();
 
 	arch_init_ideal_nops();
+
+	register_refined_jiffies(CLOCK_TICK_RATE);
 }
 
 #ifdef CONFIG_X86_32
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index 4a7e3864e80d..388edb54bfb6 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -51,21 +51,10 @@
 #define SH_DIV(NOM,DEN,LSH) (   (((NOM) / (DEN)) << (LSH))              \
                              + ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN))
 
-#ifdef CLOCK_TICK_RATE
-/* LATCH is used in the interval timer and ftape setup. */
-# define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ)	/* For divider */
-
-/*
- * HZ is the requested value. However the CLOCK_TICK_RATE may not allow
- * for exactly HZ. So SHIFTED_HZ is high res HZ ("<< 8" is for accuracy)
- */
-# define SHIFTED_HZ (SH_DIV(CLOCK_TICK_RATE, LATCH, 8))
-#else
-# define SHIFTED_HZ (HZ << 8)
-#endif
+extern int register_refined_jiffies(long clock_tick_rate);
 
 /* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */
-#define TICK_NSEC (SH_DIV(1000000UL * 1000, SHIFTED_HZ, 8))
+#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ)
 
 /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */
 #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ)
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da0537c10b..6629bf7b5285 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
  * requested HZ value. It is also not recommended
  * for "tick-less" systems.
  */
-#define NSEC_PER_JIFFY	((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
+#define NSEC_PER_JIFFY	((NSEC_PER_SEC+HZ/2)/HZ)
 
 /* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
  * conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
 {
 	return &clocksource_jiffies;
 }
+
+struct clocksource refined_jiffies;
+
+int register_refined_jiffies(long cycles_per_second)
+{
+	u64 nsec_per_tick, shift_hz;
+	long cycles_per_tick;
+
+
+
+	refined_jiffies = clocksource_jiffies;
+	refined_jiffies.name = "refined-jiffies";
+	refined_jiffies.rating++;
+
+	/* Calc cycles per tick */
+	cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
+	/* shift_hz stores hz<<8 for extra accuracy */
+	shift_hz = (u64)cycles_per_second << 8;
+	shift_hz += cycles_per_tick/2;
+	do_div(shift_hz, cycles_per_tick);
+	/* Calculate nsec_per_tick using shift_hz */
+	nsec_per_tick = (u64)NSEC_PER_SEC << 8;
+	nsec_per_tick += (u32)shift_hz/2;
+	do_div(nsec_per_tick, (u32)shift_hz);
+
+	refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
+
+	clocksource_register(&refined_jiffies);
+	return 0;
+}
-- 
cgit v1.2.3


From 189374aed657e2228ad6b39ece438c9cdafc8dae Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 15:27:48 -0400
Subject: time: Move update_vsyscall definitions to timekeeper_internal.h

Since users will need to include timekeeper_internal.h, move
update_vsyscall definitions to timekeeper_internal.h.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/ia64/kernel/time.c             |  2 +-
 arch/powerpc/kernel/time.c          |  2 +-
 arch/s390/kernel/time.c             |  2 +-
 arch/x86/kernel/vsyscall_64.c       |  2 +-
 include/linux/clocksource.h         | 16 ----------------
 include/linux/timekeeper_internal.h | 17 +++++++++++++++++
 kernel/time.c                       |  2 +-
 7 files changed, 22 insertions(+), 21 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index ecc904b33c5f..acb688fe2354 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -19,7 +19,7 @@
 #include <linux/interrupt.h>
 #include <linux/efi.h>
 #include <linux/timex.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/platform_device.h>
 
 #include <asm/machvec.h>
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e93191b69..613a830d9d50 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -73,7 +73,7 @@
 /* powerpc clocksource/clockevent code */
 
 #include <linux/clockchips.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 
 static cycle_t rtc_read(struct clocksource *);
 static struct clocksource clocksource_rtc = {
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index dcec960fc724..bfb62ad312a6 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -34,7 +34,7 @@
 #include <linux/profile.h>
 #include <linux/timex.h>
 #include <linux/notifier.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/clockchips.h>
 #include <linux/gfp.h>
 #include <linux/kprobes.h>
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 8d141b309046..6ec8411a4710 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -28,7 +28,7 @@
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
 #include <linux/topology.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/getcpu.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index fbe89e17124e..4dceaf8ae152 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -319,22 +319,6 @@ static inline void __clocksource_updatefreq_khz(struct clocksource *cs, u32 khz)
 	__clocksource_updatefreq_scale(cs, 1000, khz);
 }
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
-extern void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
-			struct clocksource *c, u32 mult);
-extern void update_vsyscall_tz(void);
-#else
-static inline void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
-			struct clocksource *c, u32 mult)
-{
-}
-
-static inline void update_vsyscall_tz(void)
-{
-}
-#endif
 
 extern void timekeeping_notify(struct clocksource *clock);
 
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 8ba43fa8c7e6..9c1c2cf413a6 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -65,4 +65,21 @@ struct timekeeper {
 	/* Seqlock for all timekeeper values */
 	seqlock_t		lock;
 };
+
+
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+extern void
+update_vsyscall(struct timespec *ts, struct timespec *wtm,
+			struct clocksource *c, u32 mult);
+extern void update_vsyscall_tz(void);
+#else
+static inline void update_vsyscall(struct timespec *ts, struct timespec *wtm,
+					struct clocksource *c, u32 mult)
+{
+}
+static inline void update_vsyscall_tz(void)
+{
+}
+#endif
+
 #endif /* _LINUX_TIMEKEEPER_INTERNAL_H */
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf80696..d226c6a3fd28 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
 #include <linux/export.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
 #include <linux/errno.h>
 #include <linux/syscalls.h>
 #include <linux/security.h>
-- 
cgit v1.2.3


From 706394211648117762edfaeffd6fc04bf3b1a75d Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 15:34:21 -0400
Subject: time: Convert CONFIG_GENERIC_TIME_VSYSCALL to
 CONFIG_GENERIC_TIME_VSYSCALL_OLD

To help migrate archtectures over to the new update_vsyscall method,
redfine CONFIG_GENERIC_TIME_VSYSCALL as CONFIG_GENERIC_TIME_VSYSCALL_OLD

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/ia64/Kconfig                   | 2 +-
 arch/ia64/kernel/time.c             | 2 +-
 arch/powerpc/Kconfig                | 2 +-
 arch/powerpc/kernel/time.c          | 2 +-
 arch/s390/Kconfig                   | 2 +-
 arch/s390/kernel/time.c             | 2 +-
 arch/x86/Kconfig                    | 2 +-
 arch/x86/kernel/vsyscall_64.c       | 2 +-
 include/linux/timekeeper_internal.h | 7 ++++---
 kernel/time/Kconfig                 | 2 +-
 kernel/time/timekeeping.c           | 2 +-
 11 files changed, 14 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf5781fad..f9e673c252a7 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -38,7 +38,7 @@ config IA64
 	select ARCH_TASK_STRUCT_ALLOCATOR
 	select ARCH_THREAD_INFO_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
index acb688fe2354..d2f4e260db40 100644
--- a/arch/ia64/kernel/time.c
+++ b/arch/ia64/kernel/time.c
@@ -454,7 +454,7 @@ void update_vsyscall_tz(void)
 {
 }
 
-void update_vsyscall(struct timespec *wall, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall, struct timespec *wtm,
 			struct clocksource *c, u32 mult)
 {
 	write_seqcount_begin(&fsyscall_gtod_data.seq);
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 352f416269ce..0881660c9efa 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -135,7 +135,7 @@ config PPC
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_CMOS_UPDATE
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index 613a830d9d50..c825809b92e7 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -712,7 +712,7 @@ static cycle_t timebase_read(struct clocksource *cs)
 	return (cycle_t)get_tb();
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	u64 new_tb_to_xs, new_stamp_xsec;
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a29..ba488aa500de 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -121,7 +121,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_SMP_IDLE_THREAD
-	select GENERIC_TIME_VSYSCALL
+	select GENERIC_TIME_VSYSCALL_OLD
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index bfb62ad312a6..c5430bf5b5e6 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -219,7 +219,7 @@ struct clocksource * __init clocksource_default_clock(void)
 	return &clocksource_tod;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	if (clock != &clocksource_tod)
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..e3f3c1a5a744 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,7 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS
 	select ARCH_CLOCKSOURCE_DATA if X86_64
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
-	select GENERIC_TIME_VSYSCALL if X86_64
+	select GENERIC_TIME_VSYSCALL_OLD if X86_64
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 6ec8411a4710..77dde2953c64 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -82,7 +82,7 @@ void update_vsyscall_tz(void)
 	vsyscall_gtod_data.sys_tz = sys_tz;
 }
 
-void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
+void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
 			struct clocksource *clock, u32 mult)
 {
 	struct timespec monotonic;
diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 9c1c2cf413a6..a904d76a5faa 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -67,13 +67,14 @@ struct timekeeper {
 };
 
 
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
 extern void
-update_vsyscall(struct timespec *ts, struct timespec *wtm,
+update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 			struct clocksource *c, u32 mult);
 extern void update_vsyscall_tz(void);
 #else
-static inline void update_vsyscall(struct timespec *ts, struct timespec *wtm,
+static inline void
+update_vsyscall_old(struct timespec *ts, struct timespec *wtm,
 					struct clocksource *c, u32 mult)
 {
 }
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd452b75..489c86154d1f 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -13,7 +13,7 @@ config ARCH_CLOCKSOURCE_DATA
 	bool
 
 # Timekeeping vsyscall support
-config GENERIC_TIME_VSYSCALL
+config GENERIC_TIME_VSYSCALL_OLD
 	bool
 
 # ktime_t scalar 64bit nsec representation
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 02c19d3d8e0d..7c2851384c46 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -199,7 +199,7 @@ static void timekeeping_update(struct timekeeper *tk, bool clearntp)
 		ntp_clear();
 	}
 	xt = tk_xtime(tk);
-	update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+	update_vsyscall_old(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
 }
 
 /**
-- 
cgit v1.2.3


From 650ea02475106e8d6bdf561896d2ffe0d1c0ebb4 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Tue, 4 Sep 2012 16:14:46 -0400
Subject: time: Convert x86_64 to using new update_vsyscall

Switch x86_64 to using sub-ns precise vsyscall

Cc: Tony Luck <tony.luck@intel.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Paul Turner <pjt@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 arch/x86/Kconfig               |  2 +-
 arch/x86/include/asm/vgtod.h   |  4 ++--
 arch/x86/kernel/vsyscall_64.c  | 47 +++++++++++++++++++++++++-----------------
 arch/x86/vdso/vclock_gettime.c | 22 +++++++++++++-------
 4 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e3f3c1a5a744..8ec3a1aa4abd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -93,7 +93,7 @@ config X86
 	select GENERIC_CLOCKEVENTS
 	select ARCH_CLOCKSOURCE_DATA if X86_64
 	select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC)
-	select GENERIC_TIME_VSYSCALL_OLD if X86_64
+	select GENERIC_TIME_VSYSCALL if X86_64
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h
index 8b38be2de9e1..46e24d36b7da 100644
--- a/arch/x86/include/asm/vgtod.h
+++ b/arch/x86/include/asm/vgtod.h
@@ -17,8 +17,8 @@ struct vsyscall_gtod_data {
 
 	/* open coded 'struct timespec' */
 	time_t		wall_time_sec;
-	u32		wall_time_nsec;
-	u32		monotonic_time_nsec;
+	u64		wall_time_snsec;
+	u64		monotonic_time_snsec;
 	time_t		monotonic_time_sec;
 
 	struct timezone sys_tz;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 77dde2953c64..3a3e8c9e280d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -82,32 +82,41 @@ void update_vsyscall_tz(void)
 	vsyscall_gtod_data.sys_tz = sys_tz;
 }
 
-void update_vsyscall_old(struct timespec *wall_time, struct timespec *wtm,
-			struct clocksource *clock, u32 mult)
+void update_vsyscall(struct timekeeper *tk)
 {
-	struct timespec monotonic;
+	struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data;
 
-	write_seqcount_begin(&vsyscall_gtod_data.seq);
+	write_seqcount_begin(&vdata->seq);
 
 	/* copy vsyscall data */
-	vsyscall_gtod_data.clock.vclock_mode	= clock->archdata.vclock_mode;
-	vsyscall_gtod_data.clock.cycle_last	= clock->cycle_last;
-	vsyscall_gtod_data.clock.mask		= clock->mask;
-	vsyscall_gtod_data.clock.mult		= mult;
-	vsyscall_gtod_data.clock.shift		= clock->shift;
-
-	vsyscall_gtod_data.wall_time_sec	= wall_time->tv_sec;
-	vsyscall_gtod_data.wall_time_nsec	= wall_time->tv_nsec;
+	vdata->clock.vclock_mode	= tk->clock->archdata.vclock_mode;
+	vdata->clock.cycle_last		= tk->clock->cycle_last;
+	vdata->clock.mask		= tk->clock->mask;
+	vdata->clock.mult		= tk->mult;
+	vdata->clock.shift		= tk->shift;
+
+	vdata->wall_time_sec		= tk->xtime_sec;
+	vdata->wall_time_snsec		= tk->xtime_nsec;
+
+	vdata->monotonic_time_sec	= tk->xtime_sec
+					+ tk->wall_to_monotonic.tv_sec;
+	vdata->monotonic_time_snsec	= tk->xtime_nsec
+					+ (tk->wall_to_monotonic.tv_nsec
+						<< tk->shift);
+	while (vdata->monotonic_time_snsec >=
+					(((u64)NSEC_PER_SEC) << tk->shift)) {
+		vdata->monotonic_time_snsec -=
+					((u64)NSEC_PER_SEC) << tk->shift;
+		vdata->monotonic_time_sec++;
+	}
 
-	monotonic = timespec_add(*wall_time, *wtm);
-	vsyscall_gtod_data.monotonic_time_sec	= monotonic.tv_sec;
-	vsyscall_gtod_data.monotonic_time_nsec	= monotonic.tv_nsec;
+	vdata->wall_time_coarse.tv_sec	= tk->xtime_sec;
+	vdata->wall_time_coarse.tv_nsec	= (long)(tk->xtime_nsec >> tk->shift);
 
-	vsyscall_gtod_data.wall_time_coarse	= __current_kernel_time();
-	vsyscall_gtod_data.monotonic_time_coarse =
-		timespec_add(vsyscall_gtod_data.wall_time_coarse, *wtm);
+	vdata->monotonic_time_coarse	= timespec_add(vdata->wall_time_coarse,
+							tk->wall_to_monotonic);
 
-	write_seqcount_end(&vsyscall_gtod_data.seq);
+	write_seqcount_end(&vdata->seq);
 }
 
 static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c
index 885eff49d6ab..4df6c373421a 100644
--- a/arch/x86/vdso/vclock_gettime.c
+++ b/arch/x86/vdso/vclock_gettime.c
@@ -80,7 +80,7 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz)
 }
 
 
-notrace static inline long vgetns(void)
+notrace static inline u64 vgetsns(void)
 {
 	long v;
 	cycles_t cycles;
@@ -91,21 +91,24 @@ notrace static inline long vgetns(void)
 	else
 		return 0;
 	v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask;
-	return (v * gtod->clock.mult) >> gtod->clock.shift;
+	return v * gtod->clock.mult;
 }
 
 /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */
 notrace static int __always_inline do_realtime(struct timespec *ts)
 {
-	unsigned long seq, ns;
+	unsigned long seq;
+	u64 ns;
 	int mode;
 
+	ts->tv_nsec = 0;
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->wall_time_sec;
-		ts->tv_nsec = gtod->wall_time_nsec;
-		ns = vgetns();
+		ns = gtod->wall_time_snsec;
+		ns += vgetsns();
+		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 
 	timespec_add_ns(ts, ns);
@@ -114,15 +117,18 @@ notrace static int __always_inline do_realtime(struct timespec *ts)
 
 notrace static int do_monotonic(struct timespec *ts)
 {
-	unsigned long seq, ns;
+	unsigned long seq;
+	u64 ns;
 	int mode;
 
+	ts->tv_nsec = 0;
 	do {
 		seq = read_seqcount_begin(&gtod->seq);
 		mode = gtod->clock.vclock_mode;
 		ts->tv_sec = gtod->monotonic_time_sec;
-		ts->tv_nsec = gtod->monotonic_time_nsec;
-		ns = vgetns();
+		ns = gtod->monotonic_time_snsec;
+		ns += vgetsns();
+		ns >>= gtod->clock.shift;
 	} while (unlikely(read_seqcount_retry(&gtod->seq, seq)));
 	timespec_add_ns(ts, ns);
 
-- 
cgit v1.2.3


From 82c93fcc2e1737fede2752520f1bf8f4de6304d8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dxchgb@gmail.com>
Date: Mon, 24 Sep 2012 07:34:51 +0000
Subject: x86: bpf_jit_comp: add XOR instruction for BPF JIT

This patch is a follow-up for patch "filter: add XOR instruction for use
with X/K" that implements BPF x86 JIT parts for the BPF XOR operation.

Signed-off-by: Daniel Borkmann <daniel.borkmann@tik.ee.ethz.ch>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/net/bpf_jit_comp.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 106c57829120..520d2bd0b9c5 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -335,9 +335,18 @@ void bpf_jit_compile(struct sk_filter *fp)
 					EMIT1_off32(0x0d, K);	/* or imm32,%eax */
 				break;
 			case BPF_S_ANC_ALU_XOR_X: /* A ^= X; */
+			case BPF_S_ALU_XOR_X:
 				seen |= SEEN_XREG;
 				EMIT2(0x31, 0xd8);		/* xor %ebx,%eax */
 				break;
+			case BPF_S_ALU_XOR_K: /* A ^= K; */
+				if (K == 0)
+					break;
+				if (is_imm8(K))
+					EMIT3(0x83, 0xf0, K);	/* xor imm8,%eax */
+				else
+					EMIT1_off32(0x35, K);	/* xor imm32,%eax */
+				break;
 			case BPF_S_ALU_LSH_X: /* A <<= X; */
 				seen |= SEEN_XREG;
 				EMIT4(0x89, 0xd9, 0xd3, 0xe0);	/* mov %ebx,%ecx; shl %cl,%eax */
-- 
cgit v1.2.3


From fdf9c356502ae02238efcdf90cefd7b473a63fd4 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 9 Sep 2012 14:56:31 +0200
Subject: cputime: Make finegrained irqtime accounting generally available

There is no known reason for this option to be unavailable on other
archs than x86. They just need to call enable_sched_clock_irqtime()
if they have a sufficiently finegrained clock to make it working.

Move it to the general option and let the user choose between
it and pure tick based or virtual cputime accounting.

Note that virtual cputime accounting already performs a finegrained
irqtime accounting. CONFIG_IRQ_TIME_ACCOUNTING is a kind of middle ground
between tick and virtual based accounting. So CONFIG_IRQ_TIME_ACCOUNTING
and CONFIG_VIRT_CPU_ACCOUNTING are mutually exclusive choices.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/Kconfig     |  6 ++++++
 arch/x86/Kconfig | 12 +-----------
 init/Kconfig     | 30 +++++++++++++++++++++++++++++-
 3 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index f78de57487ae..101c31a4744b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -284,4 +284,10 @@ config SECCOMP_FILTER
 config HAVE_VIRT_CPU_ACCOUNTING
 	bool
 
+config HAVE_IRQ_TIME_ACCOUNTING
+	bool
+	help
+	  Archs need to ensure they use a high enough resolution clock to
+	  support irq time accounting and then call enable_sched_clock_irqtime().
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..b86833aed83c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_IRQ_TIME_ACCOUNTING
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
@@ -796,17 +797,6 @@ config SCHED_MC
 	  making when dealing with multi-core CPU chips at a cost of slightly
 	  increased overhead in some places. If unsure say N here.
 
-config IRQ_TIME_ACCOUNTING
-	bool "Fine granularity task level IRQ time accounting"
-	default n
-	---help---
-	  Select this option to enable fine granularity task irq time
-	  accounting. This is done by reading a timestamp on each
-	  transitions between softirq and hardirq state, so there can be a
-	  small performance impact.
-
-	  If in doubt, say N here.
-
 source "kernel/Kconfig.preempt"
 
 config X86_UP_APIC
diff --git a/init/Kconfig b/init/Kconfig
index 2c5aa3407d6b..1862c6893078 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -326,10 +326,25 @@ source "kernel/time/Kconfig"
 
 menu "CPU/Task time and stats accounting"
 
+choice
+	prompt "Cputime accounting"
+	default TICK_CPU_ACCOUNTING if !PPC64
+	default VIRT_CPU_ACCOUNTING if PPC64
+
+# Kind of a stub config for the pure tick based cputime accounting
+config TICK_CPU_ACCOUNTING
+	bool "Simple tick based cputime accounting"
+	depends on !S390
+	help
+	  This is the basic tick based cputime accounting that maintains
+	  statistics about user, system and idle time spent on per jiffies
+	  granularity.
+
+	  If unsure, say Y.
+
 config VIRT_CPU_ACCOUNTING
 	bool "Deterministic task and CPU time accounting"
 	depends on HAVE_VIRT_CPU_ACCOUNTING
-	default y if PPC64
 	help
 	  Select this option to enable more accurate task and CPU time
 	  accounting.  This is done by reading a CPU counter on each
@@ -339,6 +354,19 @@ config VIRT_CPU_ACCOUNTING
 	  this also enables accounting of stolen time on logically-partitioned
 	  systems.
 
+config IRQ_TIME_ACCOUNTING
+	bool "Fine granularity task level IRQ time accounting"
+	depends on HAVE_IRQ_TIME_ACCOUNTING
+	help
+	  Select this option to enable fine granularity task irq time
+	  accounting. This is done by reading a timestamp on each
+	  transitions between softirq and hardirq state, so there can be a
+	  small performance impact.
+
+	  If in doubt, say N here.
+
+endchoice
+
 config BSD_PROCESS_ACCT
 	bool "BSD Process Accounting"
 	help
-- 
cgit v1.2.3


From dec08a837fda146fee498ffc5ecd0d2eeeacd025 Mon Sep 17 00:00:00 2001
From: Michael Wang <wangyun@linux.vnet.ibm.com>
Date: Wed, 19 Sep 2012 13:42:23 +0800
Subject: x86: Remove the useless branch in c_start()

Since 'cpu == -1' in cpumask_next() is legal, no need to handle
'*pos == 0' specially.

About the comments:

	/* just in case, cpu 0 is not the first */

A test with a cpumask in which cpu 0 is not the first has been
done, and it works well.

This patch will remove that useless branch to clean the code.

Signed-off-by: Michael Wang <wangyun@linux.vnet.ibm.com>
Cc: kjwinchester@gmail.com
Cc: borislav.petkov@amd.com
Cc: ak@linux.intel.com
Link: http://lkml.kernel.org/r/1348033343-23658-1-git-send-email-wangyun@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/proc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index 8022c6681485..fbd895562292 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -140,10 +140,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 
 static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	if (*pos == 0)	/* just in case, cpu 0 is not the first */
-		*pos = cpumask_first(cpu_online_mask);
-	else
-		*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	*pos = cpumask_next(*pos - 1, cpu_online_mask);
 	if ((*pos) < nr_cpu_ids)
 		return &cpu_data(*pos);
 	return NULL;
-- 
cgit v1.2.3


From 57c078ce13983d27c83f962b3e9722887209005c Mon Sep 17 00:00:00 2001
From: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Date: Wed, 26 Sep 2012 09:54:17 +0900
Subject: x86/api: Rename mp_register_lapic in a comment

Commit 31d2092eb0c23636b73d2c24c0c11b66470cef58 ("x86: move
mp_register_lapic_address to boot.c") renamed mp_register_lapic
to acpi_register_lapic. But mp_register_lapic remains in a
comment. So the patch rename it.

Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/50625239.3050403@jp.fujitsu.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/acpi/boot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index b2297e58c6ed..e651f7a589ac 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -656,7 +656,7 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu)
 	acpi_register_lapic(physid, ACPI_MADT_ENABLED);
 
 	/*
-	 * If mp_register_lapic successfully generates a new logical cpu
+	 * If acpi_register_lapic successfully generates a new logical cpu
 	 * number, then the following will get us exactly what was mapped
 	 */
 	cpumask_andnot(new_map, cpu_present_mask, tmp_map);
-- 
cgit v1.2.3


From 1b2b23d8573076a587ed2081e0d2b69691079e0e Mon Sep 17 00:00:00 2001
From: Tao Guo <glorioustao@gmail.com>
Date: Wed, 26 Sep 2012 04:28:22 -0400
Subject: x86_64: Work around old GAS bug

GAS in binutils(2.16.91) could not parse parentheses within
macro parameters unless fully parenthesized, and this is a
workaround to make old gas work without generating below errors:

 arch/x86/kernel/entry_64.S: Assembler messages:
 arch/x86/kernel/entry_64.S:387: Error: too many positional arguments
 arch/x86/kernel/entry_64.S:389: Error: too many positional arguments
 [...]

Signed-off-by: Tao Guo <glorioustao@gmail.com>
Reluctantly-Acked-by: Jan Beulich <jbeulich@novell.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Link: http://lkml.kernel.org/r/1348648102-12653-1-git-send-email-glorioustao@gmail.com
[ Jan argues that these old GAS versions are fragile - which is so, but lets give them a chance. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/calling.h | 48 ++++++++++++++++++++----------------------
 arch/x86/kernel/entry_64.S     | 20 +++++++++---------
 2 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index a9e3a740f697..7f8422a28a46 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -49,38 +49,36 @@ For 32-bit we have the following conventions - kernel is built with
 #include "dwarf2.h"
 
 /*
- * 64-bit system call stack frame layout defines and helpers, for
- * assembly code (note that the seemingly unnecessary parentheses
- * are to prevent cpp from inserting spaces in expressions that get
- * passed to macros):
+ * 64-bit system call stack frame layout defines and helpers,
+ * for assembly code:
  */
 
-#define R15		  (0)
-#define R14		  (8)
-#define R13		 (16)
-#define R12		 (24)
-#define RBP		 (32)
-#define RBX		 (40)
+#define R15		  0
+#define R14		  8
+#define R13		 16
+#define R12		 24
+#define RBP		 32
+#define RBX		 40
 
 /* arguments: interrupts/non tracing syscalls only save up to here: */
-#define R11		 (48)
-#define R10		 (56)
-#define R9		 (64)
-#define R8		 (72)
-#define RAX		 (80)
-#define RCX		 (88)
-#define RDX		 (96)
-#define RSI		(104)
-#define RDI		(112)
-#define ORIG_RAX	(120)       /* + error_code */
+#define R11		 48
+#define R10		 56
+#define R9		 64
+#define R8		 72
+#define RAX		 80
+#define RCX		 88
+#define RDX		 96
+#define RSI		104
+#define RDI		112
+#define ORIG_RAX	120       /* + error_code */
 /* end of arguments */
 
 /* cpu exception frame or undefined in case of fast syscall: */
-#define RIP		(128)
-#define CS		(136)
-#define EFLAGS		(144)
-#define RSP		(152)
-#define SS		(160)
+#define RIP		128
+#define CS		136
+#define EFLAGS		144
+#define RSP		152
+#define SS		160
 
 #define ARGOFFSET	R11
 #define SWFRAME		ORIG_RAX
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index b1dac12dc5e6..2c6706167c8d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -342,15 +342,15 @@ ENDPROC(native_usergs_sysret64)
 	.macro SAVE_ARGS_IRQ
 	cld
 	/* start from rbp in pt_regs and jump over */
-	movq_cfi rdi, RDI-RBP
-	movq_cfi rsi, RSI-RBP
-	movq_cfi rdx, RDX-RBP
-	movq_cfi rcx, RCX-RBP
-	movq_cfi rax, RAX-RBP
-	movq_cfi  r8,  R8-RBP
-	movq_cfi  r9,  R9-RBP
-	movq_cfi r10, R10-RBP
-	movq_cfi r11, R11-RBP
+	movq_cfi rdi, (RDI-RBP)
+	movq_cfi rsi, (RSI-RBP)
+	movq_cfi rdx, (RDX-RBP)
+	movq_cfi rcx, (RCX-RBP)
+	movq_cfi rax, (RAX-RBP)
+	movq_cfi  r8,  (R8-RBP)
+	movq_cfi  r9,  (R9-RBP)
+	movq_cfi r10, (R10-RBP)
+	movq_cfi r11, (R11-RBP)
 
 	/* Save rbp so that we can unwind from get_irq_regs() */
 	movq_cfi rbp, 0
@@ -384,7 +384,7 @@ ENDPROC(native_usergs_sysret64)
 	.endm
 
 ENTRY(save_rest)
-	PARTIAL_FRAME 1 REST_SKIP+8
+	PARTIAL_FRAME 1 (REST_SKIP+8)
 	movq 5*8+16(%rsp), %r11	/* save return address */
 	movq_cfi rbx, RBX+16
 	movq_cfi rbp, RBP+16
-- 
cgit v1.2.3


From c416ddf5b909736f5b57d348f5de159693e699ad Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Tue, 25 Sep 2012 14:51:19 +0200
Subject: x86: Unspaghettize do_trap()

Cleanup the label maze in this function. Having a
seperate function to first handle the traps that don't
generate a signal makes it easier to convert into
more readable conditional paths.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1348577479-2564-1-git-send-email-fweisbec@gmail.com
[ Fixed 32-bit build failure. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 60 ++++++++++++++++++++++++-------------------------
 1 file changed, 29 insertions(+), 31 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..6ff771559af3 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -107,30 +107,45 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 	dec_preempt_count();
 }
 
-static void __kprobes
-do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
-	long error_code, siginfo_t *info)
+static int __kprobes
+do_trap_no_signal(struct task_struct *tsk, int trapnr, char *str,
+		  struct pt_regs *regs,	long error_code)
 {
-	struct task_struct *tsk = current;
-
 #ifdef CONFIG_X86_32
 	if (regs->flags & X86_VM_MASK) {
 		/*
-		 * traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
+		 * Traps 0, 1, 3, 4, and 5 should be forwarded to vm86.
 		 * On nmi (interrupt 2), do_trap should not be called.
 		 */
-		if (trapnr < X86_TRAP_UD)
-			goto vm86_trap;
-		goto trap_signal;
+		if (trapnr < X86_TRAP_UD) {
+			if (!handle_vm86_trap((struct kernel_vm86_regs *) regs,
+						error_code, trapnr))
+				return 0;
+		}
+		return -1;
 	}
 #endif
+	if (!user_mode(regs)) {
+		if (!fixup_exception(regs)) {
+			tsk->thread.error_code = error_code;
+			tsk->thread.trap_nr = trapnr;
+			die(str, regs, error_code);
+		}
+		return 0;
+	}
 
-	if (!user_mode(regs))
-		goto kernel_trap;
+	return -1;
+}
 
-#ifdef CONFIG_X86_32
-trap_signal:
-#endif
+static void __kprobes
+do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
+	long error_code, siginfo_t *info)
+{
+	struct task_struct *tsk = current;
+
+
+	if (!do_trap_no_signal(tsk, trapnr, str, regs, error_code))
+		return;
 	/*
 	 * We want error_code and trap_nr set for userspace faults and
 	 * kernelspace faults which result in die(), but not
@@ -158,23 +173,6 @@ trap_signal:
 		force_sig_info(signr, info, tsk);
 	else
 		force_sig(signr, tsk);
-	return;
-
-kernel_trap:
-	if (!fixup_exception(regs)) {
-		tsk->thread.error_code = error_code;
-		tsk->thread.trap_nr = trapnr;
-		die(str, regs, error_code);
-	}
-	return;
-
-#ifdef CONFIG_X86_32
-vm86_trap:
-	if (handle_vm86_trap((struct kernel_vm86_regs *) regs,
-						error_code, trapnr))
-		goto trap_signal;
-	return;
-#endif
 }
 
 #define DO_ERROR(trapnr, signr, str, name)				\
-- 
cgit v1.2.3


From bf5a3c13b939813d28ce26c01425054c740d6731 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:34 +0200
Subject: x86: Syscall hooks for userspace RCU extended QS

Add syscall slow path hooks to notify syscall entry
and exit on CPUs that want to support userspace RCU
extended quiescent state.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/include/asm/thread_info.h | 10 +++++++---
 arch/x86/kernel/ptrace.c           |  5 +++++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 89f794f007ec..c535d847e3b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -89,6 +89,7 @@ struct thread_info {
 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
 #define TIF_IA32		17	/* IA32 compatibility process */
 #define TIF_FORK		18	/* ret_from_fork */
+#define TIF_NOHZ		19	/* in adaptive nohz mode */
 #define TIF_MEMDIE		20	/* is terminating due to OOM killer */
 #define TIF_DEBUG		21	/* uses debug registers */
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
@@ -114,6 +115,7 @@ struct thread_info {
 #define _TIF_NOTSC		(1 << TIF_NOTSC)
 #define _TIF_IA32		(1 << TIF_IA32)
 #define _TIF_FORK		(1 << TIF_FORK)
+#define _TIF_NOHZ		(1 << TIF_NOHZ)
 #define _TIF_DEBUG		(1 << TIF_DEBUG)
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
@@ -126,12 +128,13 @@ struct thread_info {
 /* work to do in syscall_trace_enter() */
 #define _TIF_WORK_SYSCALL_ENTRY	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_EMU | _TIF_SYSCALL_AUDIT |	\
-	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
+	 _TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT |	\
+	 _TIF_NOHZ)
 
 /* work to do in syscall_trace_leave() */
 #define _TIF_WORK_SYSCALL_EXIT	\
 	(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP |	\
-	 _TIF_SYSCALL_TRACEPOINT)
+	 _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
 
 /* work to do on interrupt/exception return */
 #define _TIF_WORK_MASK							\
@@ -141,7 +144,8 @@ struct thread_info {
 
 /* work to do on any return to user space */
 #define _TIF_ALLWORK_MASK						\
-	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT)
+	((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT |	\
+	_TIF_NOHZ)
 
 /* Only used for 64 bit */
 #define _TIF_DO_NOTIFY_MASK						\
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index c4c6a5c2bf0f..9f94f8ec26e4 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -21,6 +21,7 @@
 #include <linux/signal.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
+#include <linux/rcupdate.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -1463,6 +1464,8 @@ long syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
 
+	rcu_user_exit();
+
 	/*
 	 * If we stepped into a sysenter/syscall insn, it trapped in
 	 * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP.
@@ -1526,4 +1529,6 @@ void syscall_trace_leave(struct pt_regs *regs)
 			!test_thread_flag(TIF_SYSCALL_EMU);
 	if (step || test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall_exit(regs, step);
+
+	rcu_user_enter();
 }
-- 
cgit v1.2.3


From ef3f628872c838933a279d0d7e63e707783c9710 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 24 Sep 2012 21:05:52 +0200
Subject: x86: Unspaghettize do_general_protection()

There is some unnatural label based layout in this function.
Convert the unnecessary goto to readable conditional blocks.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/traps.c | 38 ++++++++++++++++----------------------
 1 file changed, 16 insertions(+), 22 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index b481341c9369..74850e559449 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -258,13 +258,25 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
-	if (regs->flags & X86_VM_MASK)
-		goto gp_in_vm86;
+	if (regs->flags & X86_VM_MASK) {
+		local_irq_enable();
+		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
+		return;
+	}
 #endif
 
 	tsk = current;
-	if (!user_mode(regs))
-		goto gp_in_kernel;
+	if (!user_mode(regs)) {
+		if (fixup_exception(regs))
+			return;
+
+		tsk->thread.error_code = error_code;
+		tsk->thread.trap_nr = X86_TRAP_GP;
+		if (!notify_die(DIE_GPF, "general protection fault", regs, error_code,
+			       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
+			die("general protection fault", regs, error_code);
+		return;
+	}
 
 	tsk->thread.error_code = error_code;
 	tsk->thread.trap_nr = X86_TRAP_GP;
@@ -280,24 +292,6 @@ do_general_protection(struct pt_regs *regs, long error_code)
 
 	force_sig(SIGSEGV, tsk);
 	return;
-
-#ifdef CONFIG_X86_32
-gp_in_vm86:
-	local_irq_enable();
-	handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-	return;
-#endif
-
-gp_in_kernel:
-	if (fixup_exception(regs))
-		return;
-
-	tsk->thread.error_code = error_code;
-	tsk->thread.trap_nr = X86_TRAP_GP;
-	if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
-			X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
-		return;
-	die("general protection fault", regs, error_code);
 }
 
 /* May run on IST stack. */
-- 
cgit v1.2.3


From 6ba3c97a38803883c2eee489505796cb0a727122 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:35 +0200
Subject: x86: Exception hooks for userspace RCU extended QS

Add necessary hooks to x86 exception for userspace
RCU extended quiescent state support.

This includes traps, page fault, debug exceptions, etc...

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 arch/x86/include/asm/rcu.h | 20 ++++++++++++
 arch/x86/kernel/traps.c    | 81 +++++++++++++++++++++++++++++++---------------
 arch/x86/mm/fault.c        | 13 ++++++--
 3 files changed, 86 insertions(+), 28 deletions(-)
 create mode 100644 arch/x86/include/asm/rcu.h

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
new file mode 100644
index 000000000000..439815b35ced
--- /dev/null
+++ b/arch/x86/include/asm/rcu.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_X86_RCU_H
+#define _ASM_X86_RCU_H
+
+#include <linux/rcupdate.h>
+#include <asm/ptrace.h>
+
+static inline void exception_enter(struct pt_regs *regs)
+{
+	rcu_user_exit();
+}
+
+static inline void exception_exit(struct pt_regs *regs)
+{
+#ifdef CONFIG_RCU_USER_QS
+	if (user_mode(regs))
+		rcu_user_enter();
+#endif
+}
+
+#endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 74850e559449..378967578f22 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -55,6 +55,7 @@
 #include <asm/i387.h>
 #include <asm/fpu-internal.h>
 #include <asm/mce.h>
+#include <asm/rcu.h>
 
 #include <asm/mach_traps.h>
 
@@ -180,11 +181,15 @@ vm86_trap:
 #define DO_ERROR(trapnr, signr, str, name)				\
 dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 {									\
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-							== NOTIFY_STOP)	\
+	exception_enter(regs);						\
+	if (notify_die(DIE_TRAP, str, regs, error_code,			\
+			trapnr, signr) == NOTIFY_STOP) {		\
+		exception_exit(regs);					\
 		return;							\
+	}								\
 	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, NULL);		\
+	exception_exit(regs);						\
 }
 
 #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr)		\
@@ -195,11 +200,15 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 	info.si_errno = 0;						\
 	info.si_code = sicode;						\
 	info.si_addr = (void __user *)siaddr;				\
-	if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr)	\
-							== NOTIFY_STOP)	\
+	exception_enter(regs);						\
+	if (notify_die(DIE_TRAP, str, regs, error_code,			\
+			trapnr, signr) == NOTIFY_STOP) {		\
+		exception_exit(regs);					\
 		return;							\
+	}								\
 	conditional_sti(regs);						\
 	do_trap(trapnr, signr, str, regs, error_code, &info);		\
+	exception_exit(regs);						\
 }
 
 DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV,
@@ -222,12 +231,14 @@ DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check,
 /* Runs on IST stack */
 dotraplinkage void do_stack_segment(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 	if (notify_die(DIE_TRAP, "stack segment", regs, error_code,
-			X86_TRAP_SS, SIGBUS) == NOTIFY_STOP)
-		return;
-	preempt_conditional_sti(regs);
-	do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
-	preempt_conditional_cli(regs);
+		       X86_TRAP_SS, SIGBUS) != NOTIFY_STOP) {
+		preempt_conditional_sti(regs);
+		do_trap(X86_TRAP_SS, SIGBUS, "stack segment", regs, error_code, NULL);
+		preempt_conditional_cli(regs);
+	}
+	exception_exit(regs);
 }
 
 dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
@@ -235,6 +246,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 	static const char str[] = "double fault";
 	struct task_struct *tsk = current;
 
+	exception_enter(regs);
 	/* Return not checked because double check cannot be ignored */
 	notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV);
 
@@ -255,27 +267,28 @@ do_general_protection(struct pt_regs *regs, long error_code)
 {
 	struct task_struct *tsk;
 
+	exception_enter(regs);
 	conditional_sti(regs);
 
 #ifdef CONFIG_X86_32
 	if (regs->flags & X86_VM_MASK) {
 		local_irq_enable();
 		handle_vm86_fault((struct kernel_vm86_regs *) regs, error_code);
-		return;
+		goto exit;
 	}
 #endif
 
 	tsk = current;
 	if (!user_mode(regs)) {
 		if (fixup_exception(regs))
-			return;
+			goto exit;
 
 		tsk->thread.error_code = error_code;
 		tsk->thread.trap_nr = X86_TRAP_GP;
-		if (!notify_die(DIE_GPF, "general protection fault", regs, error_code,
-			       X86_TRAP_GP, SIGSEGV) == NOTIFY_STOP)
+		if (notify_die(DIE_GPF, "general protection fault", regs, error_code,
+			       X86_TRAP_GP, SIGSEGV) != NOTIFY_STOP)
 			die("general protection fault", regs, error_code);
-		return;
+		goto exit;
 	}
 
 	tsk->thread.error_code = error_code;
@@ -291,7 +304,8 @@ do_general_protection(struct pt_regs *regs, long error_code)
 	}
 
 	force_sig(SIGSEGV, tsk);
-	return;
+exit:
+	exception_exit(regs);
 }
 
 /* May run on IST stack. */
@@ -306,15 +320,16 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	    ftrace_int3_handler(regs))
 		return;
 #endif
+	exception_enter(regs);
 #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
 	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 				SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 #endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 
 	if (notify_die(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP,
 			SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 
 	/*
 	 * Let others (NMI) know that the debug stack is in use
@@ -325,6 +340,8 @@ dotraplinkage void __kprobes notrace do_int3(struct pt_regs *regs, long error_co
 	do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL);
 	preempt_conditional_cli(regs);
 	debug_stack_usage_dec();
+exit:
+	exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_64
@@ -385,6 +402,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	unsigned long dr6;
 	int si_code;
 
+	exception_enter(regs);
+
 	get_debugreg(dr6, 6);
 
 	/* Filter out all the reserved bits which are preset to 1 */
@@ -400,7 +419,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	/* Catch kmemcheck conditions first of all! */
 	if ((dr6 & DR_STEP) && kmemcheck_trap(regs))
-		return;
+		goto exit;
 
 	/* DR6 may or may not be cleared by the CPU */
 	set_debugreg(0, 6);
@@ -415,7 +434,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	if (notify_die(DIE_DEBUG, "debug", regs, PTR_ERR(&dr6), error_code,
 							SIGTRAP) == NOTIFY_STOP)
-		return;
+		goto exit;
 
 	/*
 	 * Let others (NMI) know that the debug stack is in use
@@ -431,7 +450,7 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 					X86_TRAP_DB);
 		preempt_conditional_cli(regs);
 		debug_stack_usage_dec();
-		return;
+		goto exit;
 	}
 
 	/*
@@ -452,7 +471,8 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 	preempt_conditional_cli(regs);
 	debug_stack_usage_dec();
 
-	return;
+exit:
+	exception_exit(regs);
 }
 
 /*
@@ -549,14 +569,17 @@ dotraplinkage void do_coprocessor_error(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
 	ignore_fpu_irq = 1;
 #endif
-
+	exception_enter(regs);
 	math_error(regs, error_code, X86_TRAP_MF);
+	exception_exit(regs);
 }
 
 dotraplinkage void
 do_simd_coprocessor_error(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 	math_error(regs, error_code, X86_TRAP_XF);
+	exception_exit(regs);
 }
 
 dotraplinkage void
@@ -623,6 +646,7 @@ EXPORT_SYMBOL_GPL(math_state_restore);
 dotraplinkage void __kprobes
 do_device_not_available(struct pt_regs *regs, long error_code)
 {
+	exception_enter(regs);
 #ifdef CONFIG_MATH_EMULATION
 	if (read_cr0() & X86_CR0_EM) {
 		struct math_emu_info info = { };
@@ -631,6 +655,7 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 
 		info.regs = regs;
 		math_emulate(&info);
+		exception_exit(regs);
 		return;
 	}
 #endif
@@ -638,12 +663,15 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 #ifdef CONFIG_X86_32
 	conditional_sti(regs);
 #endif
+	exception_exit(regs);
 }
 
 #ifdef CONFIG_X86_32
 dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 {
 	siginfo_t info;
+
+	exception_enter(regs);
 	local_irq_enable();
 
 	info.si_signo = SIGILL;
@@ -651,10 +679,11 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 	info.si_code = ILL_BADSTK;
 	info.si_addr = NULL;
 	if (notify_die(DIE_TRAP, "iret exception", regs, error_code,
-			X86_TRAP_IRET, SIGILL) == NOTIFY_STOP)
-		return;
-	do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
-		&info);
+			X86_TRAP_IRET, SIGILL) != NOTIFY_STOP) {
+		do_trap(X86_TRAP_IRET, SIGILL, "iret exception", regs, error_code,
+			&info);
+	}
+	exception_exit(regs);
 }
 #endif
 
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 76dcd9d8e0bc..7dde46d68a25 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -18,6 +18,7 @@
 #include <asm/pgalloc.h>		/* pgd_*(), ...			*/
 #include <asm/kmemcheck.h>		/* kmemcheck_*(), ...		*/
 #include <asm/fixmap.h>			/* VSYSCALL_START		*/
+#include <asm/rcu.h>			/* exception_enter(), ...	*/
 
 /*
  * Page fault error code bits:
@@ -1000,8 +1001,8 @@ static int fault_in_kernel_space(unsigned long address)
  * and the problem, and then passes it off to one of the appropriate
  * routines.
  */
-dotraplinkage void __kprobes
-do_page_fault(struct pt_regs *regs, unsigned long error_code)
+static void __kprobes
+__do_page_fault(struct pt_regs *regs, unsigned long error_code)
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
@@ -1209,3 +1210,11 @@ good_area:
 
 	up_read(&mm->mmap_sem);
 }
+
+dotraplinkage void __kprobes
+do_page_fault(struct pt_regs *regs, unsigned long error_code)
+{
+	exception_enter(regs);
+	__do_page_fault(regs, error_code);
+	exception_exit(regs);
+}
-- 
cgit v1.2.3


From 0430499ce9d78691f3985962021b16bf8f8a8048 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:38 +0200
Subject: x86: Use the new schedule_user API on userspace preemption

This way we can exit the RCU extended quiescent state before
we schedule a new task from irq/exception exit.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/include/asm/rcu.h | 12 ++++++++++++
 arch/x86/kernel/entry_64.S |  9 +++++----
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/rcu.h b/arch/x86/include/asm/rcu.h
index 439815b35ced..d1ac07a23979 100644
--- a/arch/x86/include/asm/rcu.h
+++ b/arch/x86/include/asm/rcu.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_RCU_H
 #define _ASM_X86_RCU_H
 
+#ifndef __ASSEMBLY__
+
 #include <linux/rcupdate.h>
 #include <asm/ptrace.h>
 
@@ -17,4 +19,14 @@ static inline void exception_exit(struct pt_regs *regs)
 #endif
 }
 
+#else /* __ASSEMBLY__ */
+
+#ifdef CONFIG_RCU_USER_QS
+# define SCHEDULE_USER call schedule_user
+#else
+# define SCHEDULE_USER call schedule
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
 #endif
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..1a8f3cbb6ee3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -56,6 +56,7 @@
 #include <asm/ftrace.h>
 #include <asm/percpu.h>
 #include <asm/asm.h>
+#include <asm/rcu.h>
 #include <linux/err.h>
 
 /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
@@ -565,7 +566,7 @@ sysret_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	jmp sysret_check
 
@@ -678,7 +679,7 @@ int_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	DISABLE_INTERRUPTS(CLBR_NONE)
 	TRACE_IRQS_OFF
@@ -974,7 +975,7 @@ retint_careful:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	pushq_cfi %rdi
-	call  schedule
+	SCHEDULE_USER
 	popq_cfi %rdi
 	GET_THREAD_INFO(%rcx)
 	DISABLE_INTERRUPTS(CLBR_NONE)
@@ -1449,7 +1450,7 @@ paranoid_userspace:
 paranoid_schedule:
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_ANY)
-	call schedule
+	SCHEDULE_USER
 	DISABLE_INTERRUPTS(CLBR_ANY)
 	TRACE_IRQS_OFF
 	jmp paranoid_userspace
-- 
cgit v1.2.3


From edf55fda35c7dc7f2d9241c3abaddaf759b457c6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 11 Jul 2012 20:26:39 +0200
Subject: x86: Exit RCU extended QS on notify resume

do_notify_resume() may be called on irq or exception
exit. But at that time the exception has already called
rcu_user_enter() and the irq has already called rcu_irq_exit().

Since it can use RCU read side critical section, we must call
rcu_user_exit() before doing anything there. Then we must call
back rcu_user_enter() after this function because we know we are
going to userspace from there.

This complete support for userspace RCU extended quiescent state
in x86-64.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alessio Igor Bogani <abogani@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Avi Kivity <avi@redhat.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Geoff Levand <geoff@infradead.org>
Cc: Gilad Ben Yossef <gilad@benyossef.com>
Cc: Hakan Akkan <hakanakkan@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: Kevin Hilman <khilman@ti.com>
Cc: Max Krasnyansky <maxk@qualcomm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephen Hemminger <shemminger@vyatta.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Sven-Thorsten Dietrich <thebigcorporation@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 arch/x86/Kconfig         | 1 +
 arch/x86/kernel/signal.c | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 50a1d1f9b6d3..20c49b8450b8 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_RCU_USER_QS if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b280908a376e..bca0ab903e57 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -779,6 +779,8 @@ static void do_signal(struct pt_regs *regs)
 void
 do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 {
+	rcu_user_exit();
+
 #ifdef CONFIG_X86_MCE
 	/* notify userspace of pending MCEs */
 	if (thread_info_flags & _TIF_MCE_NOTIFY)
@@ -804,6 +806,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 #ifdef CONFIG_X86_32
 	clear_thread_flag(TIF_IRET);
 #endif /* CONFIG_X86_32 */
+
+	rcu_user_enter();
 }
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where)
-- 
cgit v1.2.3


From 402537fd9f5499d1a50a6f20cdd9031d24ee2e47 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Thu, 27 Sep 2012 09:33:26 +0800
Subject: perf/x86: Fix typo in uncore_pmu_to_box

The variable box should not be declared as static.

This could probably cause crashes with sufficiently parallel
uncore PMU use.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Cc: eranian@google.com
Cc: a.p.zijlstra@chello.nl
Link: http://lkml.kernel.org/r/1348709606-2759-1-git-send-email-zheng.z.yan@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 38e4894165b9..c7c55e74ac60 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -1950,7 +1950,7 @@ struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, int cp
 static struct intel_uncore_box *
 uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu)
 {
-	static struct intel_uncore_box *box;
+	struct intel_uncore_box *box;
 
 	box = *per_cpu_ptr(pmu->box, cpu);
 	if (box)
-- 
cgit v1.2.3


From 200429cc63399e99dd2abcdca5088559a911ef2b Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 19 Sep 2012 14:24:57 +0300
Subject: crypto: cast5/avx - fix storing of new IV in CBC encryption

cast5/avx incorrectly XORs new IV over old IV at end of CBC encryption
function when it should store. This causes CBC encryption to give
incorrect output on multi-page encryption requests.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/cast5_avx_glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index 445aab06387b..e0ea14f9547f 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -165,7 +165,7 @@ static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
 		nbytes -= bsize;
 	} while (nbytes >= bsize);
 
-	*(u64 *)walk->iv ^= *iv;
+	*(u64 *)walk->iv = *iv;
 	return nbytes;
 }
 
-- 
cgit v1.2.3


From fe04ddf7c2910362f3817c8156e41cbd6c0ee35d Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Tue, 25 Sep 2012 16:03:03 +0200
Subject: kbuild: Do not package /boot and /lib in make tar-pkg

There were reports of users destroying their Fedora installs by a kernel
tarball that replaces the /lib -> /usr/lib symlink. Let's remove the
toplevel directories from the tarball to prevent this from happening.

Reported-by: Andi Kleen <andi@firstfloor.org>
Suggested-by: Ben Hutchings <ben@decadent.org.uk>
Cc: <stable@kernel.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/x86/Makefile        | 2 +-
 scripts/Makefile.fwinst  | 4 ++--
 scripts/package/buildtar | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index c098ca4671de..b0c5276861ec 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -138,7 +138,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
 
-archscripts: scripts_basic
+archscripts:
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 ###
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index 4d908d16c035..c3f69ae275d1 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -27,7 +27,7 @@ endif
 installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
 
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
-installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./
+installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
 
 # Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
 PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
@@ -42,7 +42,7 @@ quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
 $(installed-fw-dirs):
 	$(call cmd,mkdir)
 
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $$(dir $(INSTALL_FW_PATH)/%)
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE
diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index 8a7b15598ea9..d0d748e72915 100644
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -109,7 +109,7 @@ esac
 	if tar --owner=root --group=root --help >/dev/null 2>&1; then
 		opts="--owner=root --group=root"
 	fi
-	tar cf - . $opts | ${compress} > "${tarball}${file_ext}"
+	tar cf - boot/* lib/* $opts | ${compress} > "${tarball}${file_ext}"
 )
 
 echo "Tarball successfully created in ${tarball}${file_ext}"
-- 
cgit v1.2.3


From f9a38eace4498a5e9f6d2cdfc879d5444edc3a5f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Sep 2012 13:39:47 -0400
Subject: um: let signal_delivered() do SIGTRAP on singlestepping into handler

... rather than duplicating that in sigframe setup code (and doing that
inconsistently, at that)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/signal.c | 6 +++++-
 arch/x86/um/signal.c    | 6 ------
 2 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 7362d58efc29..cc9c2350e417 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -22,9 +22,13 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 			 struct k_sigaction *ka, siginfo_t *info)
 {
 	sigset_t *oldset = sigmask_to_save();
+	int singlestep = 0;
 	unsigned long sp;
 	int err;
 
+	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
+		singlestep = 1;
+
 	/* Did we come from a system call? */
 	if (PT_REGS_SYSCALL_NR(regs) >= 0) {
 		/* If so, check system call restarting.. */
@@ -61,7 +65,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	if (err)
 		force_sigsegv(signr, current);
 	else
-		signal_delivered(signr, info, ka, regs, 0);
+		signal_delivered(signr, info, ka, regs, singlestep);
 }
 
 static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index a508cea13503..ba7363ecf896 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -416,9 +416,6 @@ int setup_signal_stack_sc(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) 0;
 	PT_REGS_CX(regs) = (unsigned long) 0;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
@@ -466,9 +463,6 @@ int setup_signal_stack_si(unsigned long stack_top, int sig,
 	PT_REGS_AX(regs) = (unsigned long) sig;
 	PT_REGS_DX(regs) = (unsigned long) &frame->info;
 	PT_REGS_CX(regs) = (unsigned long) &frame->uc;
-
-	if ((current->ptrace & PT_DTRACE) && (current->ptrace & PT_PTRACED))
-		ptrace_notify(SIGTRAP);
 	return 0;
 }
 
-- 
cgit v1.2.3


From d2ce4e92fa4f79a5fdb4cc912b411280afe21697 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 20 Sep 2012 09:28:25 -0400
Subject: um: kill thread->forking

we only use that to tell copy_thread() done by syscall from that
done by kernel_thread().  However, it's easier to do simply by
checking PF_KTHREAD in thread flags.

Merge sys_clone() guts for 32bit and 64bit, while we are at it...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  9 ---------
 arch/um/kernel/process.c                |  8 ++++----
 arch/um/kernel/syscall.c                | 24 ++++++++++++------------
 arch/x86/um/shared/sysdep/syscalls.h    |  2 ++
 arch/x86/um/sys_call_table_32.c         |  2 +-
 arch/x86/um/syscalls_32.c               | 27 +++++++--------------------
 arch/x86/um/syscalls_64.c               | 23 +++--------------------
 7 files changed, 29 insertions(+), 66 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 69f1c57a8d0d..33a6a2423bd2 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -20,14 +20,6 @@ struct mm_struct;
 
 struct thread_struct {
 	struct task_struct *saved_task;
-	/*
-	 * This flag is set to 1 before calling do_fork (and analyzed in
-	 * copy_thread) to mark that we are begin called from userspace (fork /
-	 * vfork / clone), and reset to 0 after. It is left to 0 when called
-	 * from kernelspace (i.e. kernel_thread() or fork_idle(),
-	 * as of 2.6.11).
-	 */
-	int forking;
 	struct pt_regs regs;
 	int singlestep_syscall;
 	void *fault_addr;
@@ -58,7 +50,6 @@ struct thread_struct {
 
 #define INIT_THREAD \
 { \
-	.forking		= 0, \
 	.regs		   	= EMPTY_REGS,	\
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 57fc7028714a..c5f5afa50745 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -181,11 +181,12 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
+	int kthread = current->flags & PF_KTHREAD;
 	int ret = 0;
 
 	p->thread = (struct thread_struct) INIT_THREAD;
 
-	if (current->thread.forking) {
+	if (!kthread) {
 	  	memcpy(&p->thread.regs.regs, &regs->regs,
 		       sizeof(p->thread.regs.regs));
 		PT_REGS_SET_SYSCALL_RETURN(&p->thread.regs, 0);
@@ -195,8 +196,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		handler = fork_handler;
 
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
-	}
-	else {
+	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
 		p->thread.request.u.thread = current->thread.request.u.thread;
 		handler = new_thread_handler;
@@ -204,7 +204,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 
 	new_thread(task_stack_page(p), &p->thread.switch_buf, handler);
 
-	if (current->thread.forking) {
+	if (!kthread) {
 		clear_flushed_tls(p);
 
 		/*
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index f958cb876ee3..a4c6d8eee74c 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -17,25 +17,25 @@
 
 long sys_fork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
+	return do_fork(SIGCHLD, UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
 }
 
 long sys_vfork(void)
 {
-	long ret;
-
-	current->thread.forking = 1;
-	ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
+	return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD,
 		      UPT_SP(&current->thread.regs.regs),
 		      &current->thread.regs, 0, NULL, NULL);
-	current->thread.forking = 0;
-	return ret;
+}
+
+long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid)
+{
+	if (!newsp)
+		newsp = UPT_SP(&current->thread.regs.regs);
+
+	return do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
+		      child_tid);
 }
 
 long old_mmap(unsigned long addr, unsigned long len,
diff --git a/arch/x86/um/shared/sysdep/syscalls.h b/arch/x86/um/shared/sysdep/syscalls.h
index bd9a89b67e41..ca255a805ed9 100644
--- a/arch/x86/um/shared/sysdep/syscalls.h
+++ b/arch/x86/um/shared/sysdep/syscalls.h
@@ -1,3 +1,5 @@
+extern long sys_clone(unsigned long clone_flags, unsigned long newsp,
+	       void __user *parent_tid, void __user *child_tid);
 #ifdef __i386__
 #include "syscalls_32.h"
 #else
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index 68d1dc91b37b..b5408cecac6c 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -28,7 +28,7 @@
 #define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
-#define ptregs_clone sys_clone
+#define ptregs_clone i386_clone
 #define ptregs_vm86 sys_vm86
 #define ptregs_sigaltstack sys_sigaltstack
 #define ptregs_vfork sys_vfork
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
index b853e8600b9d..db444c7218fe 100644
--- a/arch/x86/um/syscalls_32.c
+++ b/arch/x86/um/syscalls_32.c
@@ -3,37 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/shm.h"
-#include "linux/ipc.h"
-#include "linux/syscalls.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/syscalls.h>
+#include <sysdep/syscalls.h>
 
 /*
  * The prototype on i386 is:
  *
- *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls, int * child_tidptr)
+ *     int clone(int flags, void * child_stack, int * parent_tidptr, struct user_desc * newtls
  *
  * and the "newtls" arg. on i386 is read by copy_thread directly from the
  * register saved on the stack.
  */
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       int __user *parent_tid, void *newtls, int __user *child_tid)
+long i386_clone(unsigned long clone_flags, unsigned long newsp,
+		int __user *parent_tid, void *newtls, int __user *child_tid)
 {
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
+	return sys_clone(clone_flags, newsp, parent_tid, child_tid);
 }
 
+
 long sys_sigaction(int sig, const struct old_sigaction __user *act,
 			 struct old_sigaction __user *oact)
 {
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index f3d82bb6e15a..adb08eb5c22a 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -5,12 +5,9 @@
  * Licensed under the GPL
  */
 
-#include "linux/linkage.h"
-#include "linux/personality.h"
-#include "linux/utsname.h"
-#include "asm/prctl.h" /* XXX This should get the constants from libc */
-#include "asm/uaccess.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <asm/prctl.h> /* XXX This should get the constants from libc */
+#include <os.h>
 
 long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
 {
@@ -79,20 +76,6 @@ long sys_arch_prctl(int code, unsigned long addr)
 	return arch_prctl(current, code, (unsigned long __user *) addr);
 }
 
-long sys_clone(unsigned long clone_flags, unsigned long newsp,
-	       void __user *parent_tid, void __user *child_tid)
-{
-	long ret;
-
-	if (!newsp)
-		newsp = UPT_SP(&current->thread.regs.regs);
-	current->thread.forking = 1;
-	ret = do_fork(clone_flags, newsp, &current->thread.regs, 0, parent_tid,
-		      child_tid);
-	current->thread.forking = 0;
-	return ret;
-}
-
 void arch_switch_to(struct task_struct *to)
 {
 	if ((to->thread.arch.fs == 0) || (to->mm == NULL))
-- 
cgit v1.2.3


From 450cc201038f31bd496e1b3a44a49790b8827a06 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 27 Sep 2012 10:08:00 -0700
Subject: x86/mce: Provide boot argument to honour bios-set CMCI threshold

The ACPI spec doesn't provide for a way for the bios to pass down
recommended thresholds to the OS on a _per-bank_ basis. This patch adds
a new boot option, which if passed, tells Linux to use CMCI thresholds
set by the bios.

As fail-safe, we initialize threshold to 1 if some banks have not been
initialized by the bios and warn the user.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/x86/x86_64/boot-options.txt |  7 +++++++
 arch/x86/include/asm/mce.h                |  1 +
 arch/x86/kernel/cpu/mcheck/mce.c          | 10 +++++++++
 arch/x86/kernel/cpu/mcheck/mce_intel.c    | 35 ++++++++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/x86/x86_64/boot-options.txt b/Documentation/x86/x86_64/boot-options.txt
index c54b4f503e2a..de38429beb71 100644
--- a/Documentation/x86/x86_64/boot-options.txt
+++ b/Documentation/x86/x86_64/boot-options.txt
@@ -50,6 +50,13 @@ Machine check
 		monarchtimeout:
 		Sets the time in us to wait for other CPUs on machine checks. 0
 		to disable.
+   mce=bios_cmci_threshold
+		Don't overwrite the bios-set CMCI threshold. This boot option
+		prevents Linux from overwriting the CMCI threshold set by the
+		bios. Without this option, Linux always sets the CMCI
+		threshold to 1. Enabling this may make memory predictive failure
+		analysis less effective if the bios sets thresholds for memory
+		errors since we will not see details for all errors.
 
    nomce (for compatibility with i386): same as mce=off
 
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index ccaf7c581c8f..54d73b1f00a0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -161,6 +161,7 @@ DECLARE_PER_CPU(struct device *, mce_device);
 #ifdef CONFIG_X86_MCE_INTEL
 extern int mce_cmci_disabled;
 extern int mce_ignore_ce;
+extern int mce_bios_cmci_threshold;
 void mce_intel_feature_init(struct cpuinfo_x86 *c);
 void cmci_clear(void);
 void cmci_reenable(void);
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index c311122ea838..29e87d3b2843 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -83,6 +83,7 @@ static int			mce_dont_log_ce		__read_mostly;
 int				mce_cmci_disabled	__read_mostly;
 int				mce_ignore_ce		__read_mostly;
 int				mce_ser			__read_mostly;
+int				mce_bios_cmci_threshold	__read_mostly;
 
 struct mce_bank                *mce_banks		__read_mostly;
 
@@ -1946,6 +1947,7 @@ static struct miscdevice mce_chrdev_device = {
  *	check, or 0 to not wait
  * mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
  * mce=nobootlog Don't log MCEs from before booting.
+ * mce=bios_cmci_threshold Don't program the CMCI threshold
  */
 static int __init mcheck_enable(char *str)
 {
@@ -1965,6 +1967,8 @@ static int __init mcheck_enable(char *str)
 		mce_ignore_ce = 1;
 	else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog"))
 		mce_bootlog = (str[0] == 'b');
+	else if (!strcmp(str, "bios_cmci_threshold"))
+		mce_bios_cmci_threshold = 1;
 	else if (isdigit(str[0])) {
 		get_option(&str, &tolerant);
 		if (*str == ',') {
@@ -2205,6 +2209,11 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
 	&mce_cmci_disabled
 };
 
+static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
+	__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
+	&mce_bios_cmci_threshold
+};
+
 static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_tolerant.attr,
 	&dev_attr_check_interval.attr,
@@ -2213,6 +2222,7 @@ static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_dont_log_ce.attr,
 	&dev_attr_ignore_ce.attr,
 	&dev_attr_cmci_disabled.attr,
+	&dev_attr_bios_cmci_threshold.attr,
 	NULL
 };
 
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 098386fed48e..5f88abf07e9c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -181,10 +181,12 @@ static void cmci_discover(int banks)
 	unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned);
 	unsigned long flags;
 	int i;
+	int bios_wrong_thresh = 0;
 
 	raw_spin_lock_irqsave(&cmci_discover_lock, flags);
 	for (i = 0; i < banks; i++) {
 		u64 val;
+		int bios_zero_thresh = 0;
 
 		if (test_bit(i, owned))
 			continue;
@@ -198,8 +200,20 @@ static void cmci_discover(int banks)
 			continue;
 		}
 
-		val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
-		val |= MCI_CTL2_CMCI_EN | CMCI_THRESHOLD;
+		if (!mce_bios_cmci_threshold) {
+			val &= ~MCI_CTL2_CMCI_THRESHOLD_MASK;
+			val |= CMCI_THRESHOLD;
+		} else if (!(val & MCI_CTL2_CMCI_THRESHOLD_MASK)) {
+			/*
+			 * If bios_cmci_threshold boot option was specified
+			 * but the threshold is zero, we'll try to initialize
+			 * it to 1.
+			 */
+			bios_zero_thresh = 1;
+			val |= CMCI_THRESHOLD;
+		}
+
+		val |= MCI_CTL2_CMCI_EN;
 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
 
@@ -207,11 +221,26 @@ static void cmci_discover(int banks)
 		if (val & MCI_CTL2_CMCI_EN) {
 			set_bit(i, owned);
 			__clear_bit(i, __get_cpu_var(mce_poll_banks));
+			/*
+			 * We are able to set thresholds for some banks that
+			 * had a threshold of 0. This means the BIOS has not
+			 * set the thresholds properly or does not work with
+			 * this boot option. Note down now and report later.
+			 */
+			if (mce_bios_cmci_threshold && bios_zero_thresh &&
+					(val & MCI_CTL2_CMCI_THRESHOLD_MASK))
+				bios_wrong_thresh = 1;
 		} else {
 			WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks)));
 		}
 	}
 	raw_spin_unlock_irqrestore(&cmci_discover_lock, flags);
+	if (mce_bios_cmci_threshold && bios_wrong_thresh) {
+		pr_info_once(
+			"bios_cmci_threshold: Some banks do not have valid thresholds set\n");
+		pr_info_once(
+			"bios_cmci_threshold: Make sure your BIOS supports this boot option\n");
+	}
 }
 
 /*
@@ -249,7 +278,7 @@ void cmci_clear(void)
 			continue;
 		/* Disable CMCI */
 		rdmsrl(MSR_IA32_MCx_CTL2(i), val);
-		val &= ~(MCI_CTL2_CMCI_EN|MCI_CTL2_CMCI_THRESHOLD_MASK);
+		val &= ~MCI_CTL2_CMCI_EN;
 		wrmsrl(MSR_IA32_MCx_CTL2(i), val);
 		__clear_bit(i, __get_cpu_var(mce_banks_owned));
 	}
-- 
cgit v1.2.3


From bbb35efcda41d589cfff5e2b08c5fb457791117c Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Thu, 27 Sep 2012 20:10:57 +0200
Subject: um: Fix IPC on um
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

commit c1d7e01d (ipc: use Kconfig options for __ARCH_WANT_[COMPAT_]IPC_PARSE_VERSION)
forgot UML and broke IPC on it.
Also UML has to select ARCH_WANT_IPC_PARSE_VERSION usin Kconfig.

Reported-and-tested-by: <Toralf Förster toralf.foerster@gmx.de>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch/x86')

diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772d..aeaff8bef2f1 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -21,6 +21,7 @@ config 64BIT
 config X86_32
 	def_bool !64BIT
 	select HAVE_AOUT
+	select ARCH_WANT_IPC_PARSE_VERSION
 
 config X86_64
 	def_bool 64BIT
-- 
cgit v1.2.3


From 9429ec96c2718c0d1e3317cf60a87a0405223814 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 16 Aug 2012 20:15:05 +0200
Subject: um: Preinclude include/linux/kern_levels.h

The userspace part of UML uses the asm-offsets.h generator mechanism to
create definitions for UM_KERN_<LEVEL> that match the in-kernel
KERN_<LEVEL> constant definitions.

As of commit 04d2c8c83d0e3ac5f78aeede51babb3236200112 ("printk: convert
the format for KERN_<LEVEL> to a 2 byte pattern"), KERN_<LEVEL> is no
longer expanded to the literal '"<LEVEL>"', but to '"\001" "LEVEL"', i.e.
it contains two parts.

However, the combo of DEFINE_STR() in
arch/x86/um/shared/sysdep/kernel-offsets.h and sed-y in Kbuild doesn't
support string literals consisting of multiple parts. Hence for all
UM_KERN_<LEVEL> definitions, only the SOH character is retained in the actual
definition, while the remainder ends up in the comment. E.g. in
include/generated/asm-offsets.h we get

    #define UM_KERN_INFO "\001" /* "6" KERN_INFO */

instead of

    #define UM_KERN_INFO "\001" "6" /* KERN_INFO */

This causes spurious '^A' output in some kernel messages:

    Calibrating delay loop... 4640.76 BogoMIPS (lpj=23203840)
    pid_max: default: 32768 minimum: 301
    Mount-cache hash table entries: 256
    ^AChecking that host ptys support output SIGIO...Yes
    ^AChecking that host ptys support SIGIO on close...No, enabling workaround
    ^AUsing 2.6 host AIO
    NET: Registered protocol family 16
    bio: create slab <bio-0> at 0
    Switching to clocksource itimer

To fix this:
  - Move the mapping from UM_KERN_<LEVEL> to KERN_<LEVEL> from
    arch/um/include/shared/common-offsets.h to
    arch/um/include/shared/user.h, which is preincluded for all userspace
    parts,
  - Preinclude include/linux/kern_levels.h for all userspace parts, to
    obtain the in-kernel KERN_<LEVEL> constant definitions. This doesn't
    violate the kernel/userspace separation, as include/linux/kern_levels.h
    is self-contained and doesn't expose any other kernel internals.
  - Remove the now unused STR() and DEFINE_STR() macros.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/shared/common-offsets.h    | 10 ----------
 arch/um/include/shared/user.h              | 11 +++++++++++
 arch/um/scripts/Makefile.rules             |  2 +-
 arch/x86/um/shared/sysdep/kernel-offsets.h |  3 ---
 4 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/shared/common-offsets.h b/arch/um/include/shared/common-offsets.h
index 40db8f71deae..2df313b6a586 100644
--- a/arch/um/include/shared/common-offsets.h
+++ b/arch/um/include/shared/common-offsets.h
@@ -7,16 +7,6 @@ DEFINE(UM_KERN_PAGE_MASK, PAGE_MASK);
 DEFINE(UM_KERN_PAGE_SHIFT, PAGE_SHIFT);
 DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
 
-DEFINE_STR(UM_KERN_EMERG, KERN_EMERG);
-DEFINE_STR(UM_KERN_ALERT, KERN_ALERT);
-DEFINE_STR(UM_KERN_CRIT, KERN_CRIT);
-DEFINE_STR(UM_KERN_ERR, KERN_ERR);
-DEFINE_STR(UM_KERN_WARNING, KERN_WARNING);
-DEFINE_STR(UM_KERN_NOTICE, KERN_NOTICE);
-DEFINE_STR(UM_KERN_INFO, KERN_INFO);
-DEFINE_STR(UM_KERN_DEBUG, KERN_DEBUG);
-DEFINE_STR(UM_KERN_CONT, KERN_CONT);
-
 DEFINE(UM_ELF_CLASS, ELF_CLASS);
 DEFINE(UM_ELFCLASS32, ELFCLASS32);
 DEFINE(UM_ELFCLASS64, ELFCLASS64);
diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h
index 4fa82c055aab..cef068563336 100644
--- a/arch/um/include/shared/user.h
+++ b/arch/um/include/shared/user.h
@@ -26,6 +26,17 @@
 extern void panic(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 
+/* Requires preincluding include/linux/kern_levels.h */
+#define UM_KERN_EMERG	KERN_EMERG
+#define UM_KERN_ALERT	KERN_ALERT
+#define UM_KERN_CRIT	KERN_CRIT
+#define UM_KERN_ERR	KERN_ERR
+#define UM_KERN_WARNING	KERN_WARNING
+#define UM_KERN_NOTICE	KERN_NOTICE
+#define UM_KERN_INFO	KERN_INFO
+#define UM_KERN_DEBUG	KERN_DEBUG
+#define UM_KERN_CONT	KERN_CONT
+
 #ifdef UML_CONFIG_PRINTK
 extern int printk(const char *fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
diff --git a/arch/um/scripts/Makefile.rules b/arch/um/scripts/Makefile.rules
index d50270d26b42..15889df9b466 100644
--- a/arch/um/scripts/Makefile.rules
+++ b/arch/um/scripts/Makefile.rules
@@ -8,7 +8,7 @@ USER_OBJS += $(filter %_user.o,$(obj-y) $(obj-m)  $(USER_SINGLE_OBJS))
 USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
 
 $(USER_OBJS:.o=.%): \
-	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include user.h $(CFLAGS_$(basetarget).o)
+	c_flags = -Wp,-MD,$(depfile) $(USER_CFLAGS) -include $(srctree)/include/linux/kern_levels.h -include user.h $(CFLAGS_$(basetarget).o)
 
 # These are like USER_OBJS but filter USER_CFLAGS through unprofile instead of
 # using it directly.
diff --git a/arch/x86/um/shared/sysdep/kernel-offsets.h b/arch/x86/um/shared/sysdep/kernel-offsets.h
index 5868526b5eef..46a9df99f3c5 100644
--- a/arch/x86/um/shared/sysdep/kernel-offsets.h
+++ b/arch/x86/um/shared/sysdep/kernel-offsets.h
@@ -7,9 +7,6 @@
 #define DEFINE(sym, val) \
 	asm volatile("\n->" #sym " %0 " #val : : "i" (val))
 
-#define STR(x) #x
-#define DEFINE_STR(sym, val) asm volatile("\n->" #sym " " STR(val) " " #val: : )
-
 #define BLANK() asm volatile("\n->" : : )
 
 #define OFFSET(sym, str, mem) \
-- 
cgit v1.2.3


From 786d35d45cc40b2a51a18f73e14e135d47fdced7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 28 Sep 2012 14:31:03 +0930
Subject: Make most arch asm/module.h files use asm-generic/module.h

Use the mapping of Elf_[SPE]hdr, Elf_Addr, Elf_Sym, Elf_Dyn, Elf_Rel/Rela,
ELF_R_TYPE() and ELF_R_SYM() to either the 32-bit version or the 64-bit version
into asm-generic/module.h for all arches bar MIPS.

Also, use the generic definition mod_arch_specific where possible.

To this end, I've defined three new config bools:

 (*) HAVE_MOD_ARCH_SPECIFIC

     Arches define this if they don't want to use the empty generic
     mod_arch_specific struct.

 (*) MODULES_USE_ELF_RELA

     Arches define this if their modules can contain RELA records.  This causes
     the Elf_Rela mapping to be emitted and allows apply_relocate_add() to be
     defined by the arch rather than have the core emit an error message.

 (*) MODULES_USE_ELF_REL

     Arches define this if their modules can contain REL records.  This causes
     the Elf_Rel mapping to be emitted and allows apply_relocate() to be
     defined by the arch rather than have the core emit an error message.

Note that it is possible to allow both REL and RELA records: m68k and mips are
two arches that do this.

With this, some arch asm/module.h files can be deleted entirely and replaced
with a generic-y marker in the arch Kbuild file.

Additionally, I have removed the bits from m32r and score that handle the
unsupported type of relocation record as that's now handled centrally.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Sam Ravnborg <sam@ravnborg.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/Kconfig                       | 19 ++++++++++++++++++
 arch/alpha/Kconfig                 |  2 ++
 arch/alpha/include/asm/module.h    | 10 ++--------
 arch/arm/Kconfig                   |  2 ++
 arch/arm/include/asm/module.h      |  8 ++------
 arch/avr32/Kconfig                 |  2 ++
 arch/avr32/include/asm/module.h    |  6 ++----
 arch/blackfin/Kconfig              |  2 ++
 arch/blackfin/include/asm/module.h |  4 +---
 arch/c6x/Kconfig                   |  1 +
 arch/c6x/include/asm/module.h      | 12 +-----------
 arch/cris/Kconfig                  |  1 +
 arch/cris/include/asm/Kbuild       |  2 ++
 arch/cris/include/asm/module.h     |  9 ---------
 arch/frv/include/asm/module.h      |  8 +-------
 arch/h8300/Kconfig                 |  1 +
 arch/h8300/include/asm/Kbuild      |  2 ++
 arch/h8300/include/asm/module.h    | 11 -----------
 arch/hexagon/Kconfig               |  1 +
 arch/ia64/Kconfig                  |  2 ++
 arch/ia64/include/asm/module.h     |  6 ++----
 arch/m32r/Kconfig                  |  1 +
 arch/m32r/include/asm/Kbuild       |  2 ++
 arch/m32r/include/asm/module.h     | 10 ----------
 arch/m32r/kernel/module.c          | 15 --------------
 arch/m68k/Kconfig                  |  3 +++
 arch/m68k/include/asm/module.h     |  6 ++----
 arch/microblaze/Kconfig            |  1 +
 arch/mips/Kconfig                  |  3 +++
 arch/mips/include/asm/module.h     | 10 ++++++++--
 arch/mips/kernel/Makefile          |  2 +-
 arch/mn10300/Kconfig               |  1 +
 arch/mn10300/include/asm/module.h  |  7 +------
 arch/openrisc/Kconfig              |  1 +
 arch/parisc/Kconfig                |  2 ++
 arch/parisc/include/asm/module.h   | 16 +++------------
 arch/powerpc/Kconfig               |  2 ++
 arch/powerpc/include/asm/module.h  |  7 +------
 arch/s390/Kconfig                  |  2 ++
 arch/s390/include/asm/module.h     | 18 +++--------------
 arch/score/Kconfig                 |  2 ++
 arch/score/include/asm/module.h    |  6 +-----
 arch/score/kernel/module.c         | 10 ----------
 arch/sh/Kconfig                    |  2 ++
 arch/sh/include/asm/module.h       | 14 +++----------
 arch/sparc/Kconfig                 |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/sparc/include/asm/module.h    | 24 -----------------------
 arch/tile/Kconfig                  |  1 +
 arch/unicore32/Kconfig             |  1 +
 arch/x86/Kconfig                   |  2 ++
 arch/x86/um/Kconfig                |  2 ++
 arch/xtensa/Kconfig                |  1 +
 arch/xtensa/include/asm/module.h   |  9 +--------
 include/asm-generic/module.h       | 40 +++++++++++++++++++++++++++++++-------
 include/linux/moduleloader.h       | 36 ++++++++++++++++++++++++++++++----
 kernel/module.c                    | 20 -------------------
 57 files changed, 168 insertions(+), 224 deletions(-)
 delete mode 100644 arch/cris/include/asm/module.h
 delete mode 100644 arch/h8300/include/asm/module.h
 delete mode 100644 arch/m32r/include/asm/module.h
 delete mode 100644 arch/sparc/include/asm/module.h

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index 72f2fa189cc5..3450115c6437 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -281,4 +281,23 @@ config SECCOMP_FILTER
 
 	  See Documentation/prctl/seccomp_filter.txt for details.
 
+config HAVE_MOD_ARCH_SPECIFIC
+	bool
+	help
+	  The arch uses struct mod_arch_specific to store data.  Many arches
+	  just need a simple module loader without arch specific data - those
+	  should not enable this.
+
+config MODULES_USE_ELF_RELA
+	bool
+	help
+	  Modules only use ELF RELA relocations.  Modules with ELF REL
+	  relocations will give an error.
+
+config MODULES_USE_ELF_REL
+	bool
+	help
+	  Modules only use ELF REL relocations.  Modules with ELF RELA
+	  relocations will give an error.
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index 9944dedee5b1..7e3710c0cce5 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -20,6 +20,8 @@ config ALPHA
 	select GENERIC_CMOS_UPDATE
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	help
 	  The Alpha is a 64-bit general-purpose processor designed and
 	  marketed by the Digital Equipment Corporation of blessed memory,
diff --git a/arch/alpha/include/asm/module.h b/arch/alpha/include/asm/module.h
index 7b63743c534a..9cd13b55155f 100644
--- a/arch/alpha/include/asm/module.h
+++ b/arch/alpha/include/asm/module.h
@@ -1,19 +1,13 @@
 #ifndef _ALPHA_MODULE_H
 #define _ALPHA_MODULE_H
 
+#include <asm-generic/module.h>
+
 struct mod_arch_specific
 {
 	unsigned int gotsecindex;
 };
 
-#define Elf_Sym Elf64_Sym
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Phdr Elf64_Phdr
-#define Elf_Dyn Elf64_Dyn
-#define Elf_Rel Elf64_Rel
-#define Elf_Rela Elf64_Rela
-
 #define ARCH_SHF_SMALL SHF_ALPHA_GPREL
 
 #ifdef MODULE
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 2f88d8d97701..7a08b3a71c01 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -49,6 +49,8 @@ config ARM
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN
+	select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
+	select MODULES_USE_ELF_REL
 	help
 	  The ARM series is a line of low-power-consumption RISC chip designs
 	  licensed by ARM Ltd and targeted at embedded applications and
diff --git a/arch/arm/include/asm/module.h b/arch/arm/include/asm/module.h
index 6c6809f982f1..0d3a28dbc8e5 100644
--- a/arch/arm/include/asm/module.h
+++ b/arch/arm/include/asm/module.h
@@ -1,9 +1,7 @@
 #ifndef _ASM_ARM_MODULE_H
 #define _ASM_ARM_MODULE_H
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 struct unwind_table;
 
@@ -16,13 +14,11 @@ enum {
 	ARM_SEC_DEVEXIT,
 	ARM_SEC_MAX,
 };
-#endif
 
 struct mod_arch_specific {
-#ifdef CONFIG_ARM_UNWIND
 	struct unwind_table *unwind[ARM_SEC_MAX];
-#endif
 };
+#endif
 
 /*
  * Add the ARM architecture version to the version magic string
diff --git a/arch/avr32/Kconfig b/arch/avr32/Kconfig
index 5ade51c8a87f..06e73bf665e9 100644
--- a/arch/avr32/Kconfig
+++ b/arch/avr32/Kconfig
@@ -15,6 +15,8 @@ config AVR32
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	help
 	  AVR32 is a high-performance 32-bit RISC microprocessor core,
 	  designed for cost-sensitive embedded applications, with particular
diff --git a/arch/avr32/include/asm/module.h b/arch/avr32/include/asm/module.h
index 451444538a1b..3f083d385a64 100644
--- a/arch/avr32/include/asm/module.h
+++ b/arch/avr32/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef __ASM_AVR32_MODULE_H
 #define __ASM_AVR32_MODULE_H
 
+#include <asm-generic/module.h>
+
 struct mod_arch_syminfo {
 	unsigned long got_offset;
 	int got_initialized;
@@ -17,10 +19,6 @@ struct mod_arch_specific {
 	struct mod_arch_syminfo *syminfo;
 };
 
-#define Elf_Shdr		Elf32_Shdr
-#define Elf_Sym			Elf32_Sym
-#define Elf_Ehdr		Elf32_Ehdr
-
 #define MODULE_PROC_FAMILY "AVR32v1"
 
 #define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c7092e6057c5..8e82e267b897 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -42,6 +42,8 @@ config BLACKFIN
 	select HAVE_NMI_WATCHDOG if NMI_WATCHDOG
 	select GENERIC_SMP_IDLE_THREAD
 	select ARCH_USES_GETTIMEOFFSET if !GENERIC_CLOCKEVENTS
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config GENERIC_CSUM
 	def_bool y
diff --git a/arch/blackfin/include/asm/module.h b/arch/blackfin/include/asm/module.h
index ed5689b82c9f..231a149b3f77 100644
--- a/arch/blackfin/include/asm/module.h
+++ b/arch/blackfin/include/asm/module.h
@@ -7,9 +7,7 @@
 #ifndef _ASM_BFIN_MODULE_H
 #define _ASM_BFIN_MODULE_H
 
-#define Elf_Shdr        Elf32_Shdr
-#define Elf_Sym         Elf32_Sym
-#define Elf_Ehdr        Elf32_Ehdr
+#include <asm-generic/module.h>
 
 struct mod_arch_specific {
 	Elf_Shdr	*text_l1;
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 983c859e40b7..f6a3648f5ec3 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -17,6 +17,7 @@ config C6X
 	select OF
 	select OF_EARLY_FLATTREE
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config MMU
 	def_bool n
diff --git a/arch/c6x/include/asm/module.h b/arch/c6x/include/asm/module.h
index a453f9744f42..5c7269c7ef73 100644
--- a/arch/c6x/include/asm/module.h
+++ b/arch/c6x/include/asm/module.h
@@ -13,17 +13,7 @@
 #ifndef _ASM_C6X_MODULE_H
 #define _ASM_C6X_MODULE_H
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-#define Elf_Addr	Elf32_Addr
-#define Elf_Word	Elf32_Word
-
-/*
- * This file contains the C6x architecture specific module code.
- */
-struct mod_arch_specific {
-};
+#include <asm-generic/module.h>
 
 struct loaded_sections {
 	unsigned int new_vaddr;
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index e92215428a37..7bb8cf90e6aa 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -47,6 +47,7 @@ config CRIS
 	select GENERIC_IOMAP
 	select GENERIC_SMP_IDLE_THREAD if ETRAX_ARCH_V32
 	select GENERIC_CMOS_UPDATE
+	select MODULES_USE_ELF_RELA
 
 config HZ
 	int
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 04d02a51c5e9..28b690de7971 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -7,3 +7,5 @@ header-y += ethernet.h
 header-y += etraxgpio.h
 header-y += rs485.h
 header-y += sync_serial.h
+
+generic-y += module.h
diff --git a/arch/cris/include/asm/module.h b/arch/cris/include/asm/module.h
deleted file mode 100644
index 7ee72311bd78..000000000000
--- a/arch/cris/include/asm/module.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef _ASM_CRIS_MODULE_H
-#define _ASM_CRIS_MODULE_H
-/* cris is simple */
-struct mod_arch_specific { };
-
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#endif /* _ASM_CRIS_MODULE_H */
diff --git a/arch/frv/include/asm/module.h b/arch/frv/include/asm/module.h
index 3d5c6360289a..a8848f09a217 100644
--- a/arch/frv/include/asm/module.h
+++ b/arch/frv/include/asm/module.h
@@ -11,13 +11,7 @@
 #ifndef _ASM_MODULE_H
 #define _ASM_MODULE_H
 
-struct mod_arch_specific
-{
-};
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 /*
  * Include the architecture version.
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09ce..c149d3b29eb6 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -6,6 +6,7 @@ config H8300
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
+	select MODULES_USE_ELF_RELA
 
 config SYMBOL_PREFIX
 	string
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index c68e1680da01..871382d239fe 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= module.h
diff --git a/arch/h8300/include/asm/module.h b/arch/h8300/include/asm/module.h
deleted file mode 100644
index 8e46724b7c09..000000000000
--- a/arch/h8300/include/asm/module.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#ifndef _ASM_H8300_MODULE_H
-#define _ASM_H8300_MODULE_H
-/*
- * This file contains the H8/300 architecture specific module code.
- */
-struct mod_arch_specific { };
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
-#endif /* _ASM_H8/300_MODULE_H */
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index b2fdfb700f50..0744f7d7b1fd 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -30,6 +30,7 @@ config HEXAGON
 	select KTIME_SCALAR
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CLOCKEVENTS_BROADCAST
+	select MODULES_USE_ELF_RELA
 	---help---
 	  Qualcomm Hexagon is a processor architecture designed for high
 	  performance and low power across a wide variety of applications.
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 310cf5781fad..688146466d0d 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -39,6 +39,8 @@ config IA64
 	select ARCH_THREAD_INFO_ALLOCATOR
 	select ARCH_CLOCKSOURCE_DATA
 	select GENERIC_TIME_VSYSCALL
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h
index 908eaef42a08..dfba22a872c3 100644
--- a/arch/ia64/include/asm/module.h
+++ b/arch/ia64/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_IA64_MODULE_H
 #define _ASM_IA64_MODULE_H
 
+#include <asm-generic/module.h>
+
 /*
  * IA-64-specific support for kernel module loader.
  *
@@ -29,10 +31,6 @@ struct mod_arch_specific {
 	unsigned int next_got_entry;	/* index of next available got entry */
 };
 
-#define Elf_Shdr	Elf64_Shdr
-#define Elf_Sym		Elf64_Sym
-#define Elf_Ehdr	Elf64_Ehdr
-
 #define MODULE_PROC_FAMILY	"ia64"
 #define MODULE_ARCH_VERMAGIC	MODULE_PROC_FAMILY \
 	"gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__)
diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig
index 49498bbb9616..fc6153361bf5 100644
--- a/arch/m32r/Kconfig
+++ b/arch/m32r/Kconfig
@@ -13,6 +13,7 @@ config M32R
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
 	select ARCH_USES_GETTIMEOFFSET
+	select MODULES_USE_ELF_RELA
 
 config SBUS
 	bool
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c68e1680da01..871382d239fe 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= module.h
diff --git a/arch/m32r/include/asm/module.h b/arch/m32r/include/asm/module.h
deleted file mode 100644
index eb73ee011215..000000000000
--- a/arch/m32r/include/asm/module.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef _ASM_M32R_MODULE_H
-#define _ASM_M32R_MODULE_H
-
-struct mod_arch_specific { };
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-
-#endif /* _ASM_M32R_MODULE_H */
diff --git a/arch/m32r/kernel/module.c b/arch/m32r/kernel/module.c
index 3071fe83ffc8..38233b6596b6 100644
--- a/arch/m32r/kernel/module.c
+++ b/arch/m32r/kernel/module.c
@@ -201,18 +201,3 @@ int apply_relocate_add(Elf32_Shdr *sechdrs,
 	}
 	return 0;
 }
-
-int apply_relocate(Elf32_Shdr *sechdrs,
-		       const char *strtab,
-		       unsigned int symindex,
-		       unsigned int relsec,
-		       struct module *me)
-{
-#if 0
-	printk(KERN_ERR "module %s: REL RELOCATION unsupported\n",
-	       me->name);
-	return -ENOEXEC;
-#endif
-	return 0;
-
-}
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dce..0df07cee3faf 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -13,6 +13,9 @@ config M68K
 	select FPU if MMU
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_USES_GETTIMEOFFSET if MMU && !COLDFIRE
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
+	select MODULES_USE_ELF_RELA
 
 config RWSEM_GENERIC_SPINLOCK
 	bool
diff --git a/arch/m68k/include/asm/module.h b/arch/m68k/include/asm/module.h
index edffe66b7f49..8b58fce843dd 100644
--- a/arch/m68k/include/asm/module.h
+++ b/arch/m68k/include/asm/module.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_M68K_MODULE_H
 #define _ASM_M68K_MODULE_H
 
+#include <asm-generic/module.h>
+
 enum m68k_fixup_type {
 	m68k_fixup_memoffset,
 	m68k_fixup_vnode_shift,
@@ -36,8 +38,4 @@ struct module;
 extern void module_fixup(struct module *mod, struct m68k_fixup_info *start,
 			 struct m68k_fixup_info *end);
 
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-
 #endif /* _ASM_M68K_MODULE_H */
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index ab9afcaa7f6a..b4f409f942ab 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -24,6 +24,7 @@ config MICROBLAZE
 	select GENERIC_CPU_DEVICES
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config SWAP
 	def_bool n
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index faf65286574e..dccdfcd9e18e 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -36,6 +36,9 @@ config MIPS
 	select BUILDTIME_EXTABLE_SORT
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
+	select MODULES_USE_ELF_RELA if 64BIT
 
 menu "Machine selection"
 
diff --git a/arch/mips/include/asm/module.h b/arch/mips/include/asm/module.h
index dca8bce8c7ab..26137da1c713 100644
--- a/arch/mips/include/asm/module.h
+++ b/arch/mips/include/asm/module.h
@@ -35,11 +35,14 @@ typedef struct {
 } Elf64_Mips_Rela;
 
 #ifdef CONFIG_32BIT
-
 #define Elf_Shdr	Elf32_Shdr
 #define Elf_Sym		Elf32_Sym
 #define Elf_Ehdr	Elf32_Ehdr
 #define Elf_Addr	Elf32_Addr
+#define Elf_Rel		Elf32_Rel
+#define Elf_Rela	Elf32_Rela
+#define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF32_R_SYM(X)
 
 #define Elf_Mips_Rel	Elf32_Rel
 #define Elf_Mips_Rela	Elf32_Rela
@@ -50,11 +53,14 @@ typedef struct {
 #endif
 
 #ifdef CONFIG_64BIT
-
 #define Elf_Shdr	Elf64_Shdr
 #define Elf_Sym		Elf64_Sym
 #define Elf_Ehdr	Elf64_Ehdr
 #define Elf_Addr	Elf64_Addr
+#define Elf_Rel		Elf64_Rel
+#define Elf_Rela	Elf64_Rela
+#define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF64_R_SYM(X)
 
 #define Elf_Mips_Rel	Elf64_Mips_Rel
 #define Elf_Mips_Rela	Elf64_Mips_Rela
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index e2c14999839a..cd1e6c2421b2 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -31,7 +31,7 @@ obj-$(CONFIG_SYNC_R4K)		+= sync-r4k.o
 
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_MODULES)		+= mips_ksyms.o module.o
-obj-$(CONFIG_MODULES)		+= module-rela.o
+obj-$(CONFIG_MODULES_USE_ELF_RELA) += module-rela.o
 
 obj-$(CONFIG_FUNCTION_TRACER)	+= mcount.o ftrace.o
 
diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig
index 5cfb086b3903..aa03f2e13385 100644
--- a/arch/mn10300/Kconfig
+++ b/arch/mn10300/Kconfig
@@ -8,6 +8,7 @@ config MN10300
 	select HAVE_ARCH_KGDB
 	select HAVE_NMI_WATCHDOG if MN10300_WD_TIMER
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 config AM33_2
 	def_bool n
diff --git a/arch/mn10300/include/asm/module.h b/arch/mn10300/include/asm/module.h
index 5d7057d01494..6571103b0518 100644
--- a/arch/mn10300/include/asm/module.h
+++ b/arch/mn10300/include/asm/module.h
@@ -12,12 +12,7 @@
 #ifndef _ASM_MODULE_H
 #define _ASM_MODULE_H
 
-struct mod_arch_specific {
-};
-
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
+#include <asm-generic/module.h>
 
 /*
  * Include the MN10300 architecture version.
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index 49765b53f637..05f2ba41ff1a 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -21,6 +21,7 @@ config OPENRISC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_RELA
 
 config MMU
 	def_bool y
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 3ff21b536f28..166d9911bc83 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -19,6 +19,8 @@ config PARISC
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_STRNCPY_FROM_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 	help
 	  The PA-RISC microprocessor is designed by Hewlett-Packard and used
diff --git a/arch/parisc/include/asm/module.h b/arch/parisc/include/asm/module.h
index 1f4123427ea0..bab37e99168a 100644
--- a/arch/parisc/include/asm/module.h
+++ b/arch/parisc/include/asm/module.h
@@ -1,21 +1,11 @@
 #ifndef _ASM_PARISC_MODULE_H
 #define _ASM_PARISC_MODULE_H
+
+#include <asm-generic/module.h>
+
 /*
  * This file contains the parisc architecture specific module code.
  */
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#define Elf_Addr Elf64_Addr
-#define Elf_Rela Elf64_Rela
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
-#define Elf_Addr Elf32_Addr
-#define Elf_Rela Elf32_Rela
-#endif
 
 struct unwind_table;
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 352f416269ce..74f84781b484 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -139,6 +139,8 @@ config PPC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config EARLY_PRINTK
 	bool
diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h
index 0192a4ee2bc2..c1df590ec444 100644
--- a/arch/powerpc/include/asm/module.h
+++ b/arch/powerpc/include/asm/module.h
@@ -11,6 +11,7 @@
 
 #include <linux/list.h>
 #include <asm/bug.h>
+#include <asm-generic/module.h>
 
 
 #ifndef __powerpc64__
@@ -60,16 +61,10 @@ struct mod_arch_specific {
  */
 
 #ifdef __powerpc64__
-#    define Elf_Shdr	Elf64_Shdr
-#    define Elf_Sym	Elf64_Sym
-#    define Elf_Ehdr	Elf64_Ehdr
 #    ifdef MODULE
 	asm(".section .stubs,\"ax\",@nobits; .align 3; .previous");
 #    endif
 #else
-#    define Elf_Shdr	Elf32_Shdr
-#    define Elf_Sym	Elf32_Sym
-#    define Elf_Ehdr	Elf32_Ehdr
 #    ifdef MODULE
 	asm(".section .plt,\"ax\",@nobits; .align 3; .previous");
 	asm(".section .init.plt,\"ax\",@nobits; .align 3; .previous");
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a29..c76a052f60e2 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -125,6 +125,8 @@ config S390
 	select GENERIC_CLOCKEVENTS
 	select KTIME_SCALAR if 32BIT
 	select HAVE_ARCH_SECCOMP_FILTER
+	select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_RELA
 
 config SCHED_OMIT_FRAME_POINTER
 	def_bool y
diff --git a/arch/s390/include/asm/module.h b/arch/s390/include/asm/module.h
index f0b6b26b6e59..df1f861a848a 100644
--- a/arch/s390/include/asm/module.h
+++ b/arch/s390/include/asm/module.h
@@ -1,5 +1,8 @@
 #ifndef _ASM_S390_MODULE_H
 #define _ASM_S390_MODULE_H
+
+#include <asm-generic/module.h>
+
 /*
  * This file contains the s390 architecture specific module code.
  */
@@ -28,19 +31,4 @@ struct mod_arch_specific
 	struct mod_arch_syminfo *syminfo;
 };
 
-#ifdef CONFIG_64BIT
-#define ElfW(x) Elf64_ ## x
-#define ELFW(x) ELF64_ ## x
-#else
-#define ElfW(x) Elf32_ ## x
-#define ELFW(x) ELF32_ ## x
-#endif
-
-#define Elf_Addr ElfW(Addr)
-#define Elf_Rela ElfW(Rela)
-#define Elf_Shdr ElfW(Shdr)
-#define Elf_Sym ElfW(Sym)
-#define Elf_Ehdr ElfW(Ehdr)
-#define ELF_R_SYM ELFW(R_SYM)
-#define ELF_R_TYPE ELFW(R_TYPE)
 #endif /* _ASM_S390_MODULE_H */
diff --git a/arch/score/Kconfig b/arch/score/Kconfig
index ba0f412920be..e2c8db4533dc 100644
--- a/arch/score/Kconfig
+++ b/arch/score/Kconfig
@@ -10,6 +10,8 @@ config SCORE
        select ARCH_DISCARD_MEMBLOCK
        select GENERIC_CPU_DEVICES
        select GENERIC_CLOCKEVENTS
+       select HAVE_MOD_ARCH_SPECIFIC
+	select MODULES_USE_ELF_REL
 
 choice
 	prompt "System type"
diff --git a/arch/score/include/asm/module.h b/arch/score/include/asm/module.h
index f0b5dc0bd023..abf395bbfaba 100644
--- a/arch/score/include/asm/module.h
+++ b/arch/score/include/asm/module.h
@@ -3,6 +3,7 @@
 
 #include <linux/list.h>
 #include <asm/uaccess.h>
+#include <asm-generic/module.h>
 
 struct mod_arch_specific {
 	/* Data Bus Error exception tables */
@@ -13,11 +14,6 @@ struct mod_arch_specific {
 
 typedef uint8_t Elf64_Byte;		/* Type for a 8-bit quantity. */
 
-#define Elf_Shdr	Elf32_Shdr
-#define Elf_Sym		Elf32_Sym
-#define Elf_Ehdr	Elf32_Ehdr
-#define Elf_Addr	Elf32_Addr
-
 /* Given an address, look for it in the exception tables. */
 #ifdef CONFIG_MODULES
 const struct exception_table_entry *search_module_dbetables(unsigned long addr);
diff --git a/arch/score/kernel/module.c b/arch/score/kernel/module.c
index 469e3b64e2f2..1378d99baa3d 100644
--- a/arch/score/kernel/module.c
+++ b/arch/score/kernel/module.c
@@ -125,16 +125,6 @@ int apply_relocate(Elf_Shdr *sechdrs, const char *strtab,
 	return 0;
 }
 
-int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
-		unsigned int symindex, unsigned int relsec,
-		struct module *me)
-{
-	/* Non-standard return value... most other arch's return -ENOEXEC
-	 * for an unsupported relocation variant
-	 */
-	return 0;
-}
-
 /* Given an address, look for it in the module exception tables. */
 const struct exception_table_entry *search_module_dbetables(unsigned long addr)
 {
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e8041..656329a9a59b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -35,6 +35,8 @@ config SUPERH
 	select GENERIC_CMOS_UPDATE if SH_SH03 || SH_DREAMCAST
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select HAVE_MOD_ARCH_SPECIFIC if DWARF_UNWINDER
+	select MODULES_USE_ELF_RELA
 	help
 	  The SuperH is a RISC processor targeted for use in embedded systems
 	  and consumer electronics; it was also used in the Sega Dreamcast
diff --git a/arch/sh/include/asm/module.h b/arch/sh/include/asm/module.h
index b7927de86f9f..81300d8b5448 100644
--- a/arch/sh/include/asm/module.h
+++ b/arch/sh/include/asm/module.h
@@ -1,21 +1,13 @@
 #ifndef _ASM_SH_MODULE_H
 #define _ASM_SH_MODULE_H
 
-struct mod_arch_specific {
+#include <asm-generic/module.h>
+
 #ifdef CONFIG_DWARF_UNWINDER
+struct mod_arch_specific {
 	struct list_head fde_list;
 	struct list_head cie_list;
-#endif
 };
-
-#ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
 #endif
 
 #ifdef CONFIG_CPU_LITTLE_ENDIAN
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e1..a244e70b9bb0 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -37,6 +37,7 @@ config SPARC
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_RELA
 
 config SPARC32
 	def_bool !64BIT
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 67f83e0a0d68..fbe1cb578fcd 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -21,4 +21,5 @@ generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
 generic-y += local.h
+generic-y += module.h
 generic-y += word-at-a-time.h
diff --git a/arch/sparc/include/asm/module.h b/arch/sparc/include/asm/module.h
deleted file mode 100644
index ff8e02d80334..000000000000
--- a/arch/sparc/include/asm/module.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __SPARC_MODULE_H
-#define __SPARC_MODULE_H
-struct mod_arch_specific { };
-
-/*
- * Use some preprocessor magic to define the correct symbol
- * for sparc32 and sparc64.
- * Elf_Addr becomes Elf32_Addr for sparc32 and Elf64_Addr for sparc64
- */
-#define ___ELF(a, b, c) a##b##c
-#define __ELF(a, b, c)  ___ELF(a, b, c)
-#define  _Elf(t)        __ELF(Elf, CONFIG_BITS, t)
-#define  _ELF(t)        __ELF(ELF, CONFIG_BITS, t)
-
-#define Elf_Shdr     _Elf(_Shdr)
-#define Elf_Sym      _Elf(_Sym)
-#define Elf_Ehdr     _Elf(_Ehdr)
-#define Elf_Rela     _Elf(_Rela)
-#define Elf_Addr     _Elf(_Addr)
-
-#define ELF_R_SYM    _ELF(_R_SYM)
-#define ELF_R_TYPE   _ELF(_R_TYPE)
-
-#endif /* __SPARC_MODULE_H */
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 932e4430f7f3..1603f3043399 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -17,6 +17,7 @@ config TILE
 	select SYS_HYPERVISOR
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_CLOCKEVENTS
+	select MODULES_USE_ELF_RELA
 
 # FIXME: investigate whether we need/want these options.
 #	select HAVE_IOREMAP_PROT
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index b0a47433341e..5ef081475bba 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -14,6 +14,7 @@ config UNICORE32
 	select GENERIC_IRQ_SHOW
 	select ARCH_WANT_FRAME_POINTERS
 	select GENERIC_IOMAP
+	select MODULES_USE_ELF_REL
 	help
 	  UniCore-32 is 32-bit Instruction Set Architecture,
 	  including a series of low-power-consumption RISC chip
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..01726cbcc73b 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,8 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select MODULES_USE_ELF_REL if X86_32
+	select MODULES_USE_ELF_RELA if X86_64
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772d..a4b0c10c9d5d 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -21,9 +21,11 @@ config 64BIT
 config X86_32
 	def_bool !64BIT
 	select HAVE_AOUT
+	select MODULES_USE_ELF_REL
 
 config X86_64
 	def_bool 64BIT
+	select MODULES_USE_ELF_RELA
 
 config RWSEM_XCHGADD_ALGORITHM
 	def_bool X86_XADD && 64BIT
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 8ed64cfae4ff..4816e44001f1 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -11,6 +11,7 @@ config XTENSA
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
+	select MODULES_USE_ELF_RELA
 	help
 	  Xtensa processors are 32-bit RISC machines designed by Tensilica
 	  primarily for embedded systems.  These processors are both
diff --git a/arch/xtensa/include/asm/module.h b/arch/xtensa/include/asm/module.h
index d9b34bee4d42..488b40c6f9b9 100644
--- a/arch/xtensa/include/asm/module.h
+++ b/arch/xtensa/include/asm/module.h
@@ -13,15 +13,8 @@
 #ifndef _XTENSA_MODULE_H
 #define _XTENSA_MODULE_H
 
-struct mod_arch_specific
-{
-	/* No special elements, yet. */
-};
-
 #define MODULE_ARCH_VERMAGIC "xtensa-" __stringify(XCHAL_CORE_ID) " "
 
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
+#include <asm-generic/module.h>
 
 #endif	/* _XTENSA_MODULE_H */
diff --git a/include/asm-generic/module.h b/include/asm-generic/module.h
index ed5b44de4c91..14dc41d185a7 100644
--- a/include/asm-generic/module.h
+++ b/include/asm-generic/module.h
@@ -5,18 +5,44 @@
  * Many architectures just need a simple module
  * loader without arch specific data.
  */
+#ifndef CONFIG_HAVE_MOD_ARCH_SPECIFIC
 struct mod_arch_specific
 {
 };
+#endif
 
 #ifdef CONFIG_64BIT
-#define Elf_Shdr Elf64_Shdr
-#define Elf_Sym Elf64_Sym
-#define Elf_Ehdr Elf64_Ehdr
-#else
-#define Elf_Shdr Elf32_Shdr
-#define Elf_Sym Elf32_Sym
-#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Shdr	Elf64_Shdr
+#define Elf_Phdr	Elf64_Phdr
+#define Elf_Sym		Elf64_Sym
+#define Elf_Dyn		Elf64_Dyn
+#define Elf_Ehdr	Elf64_Ehdr
+#define Elf_Addr	Elf64_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf64_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf64_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF64_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF64_R_SYM(X)
+
+#else /* CONFIG_64BIT */
+
+#define Elf_Shdr	Elf32_Shdr
+#define Elf_Phdr	Elf32_Phdr
+#define Elf_Sym		Elf32_Sym
+#define Elf_Dyn		Elf32_Dyn
+#define Elf_Ehdr	Elf32_Ehdr
+#define Elf_Addr	Elf32_Addr
+#ifdef CONFIG_MODULES_USE_ELF_REL
+#define Elf_Rel		Elf32_Rel
+#endif
+#ifdef CONFIG_MODULES_USE_ELF_RELA
+#define Elf_Rela	Elf32_Rela
+#endif
+#define ELF_R_TYPE(X)	ELF32_R_TYPE(X)
+#define ELF_R_SYM(X)	ELF32_R_SYM(X)
 #endif
 
 #endif /* __ASM_GENERIC_MODULE_H */
diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index b2be02ebf453..560ca53a75fa 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -28,21 +28,49 @@ void *module_alloc(unsigned long size);
 /* Free memory returned from module_alloc. */
 void module_free(struct module *mod, void *module_region);
 
-/* Apply the given relocation to the (simplified) ELF.  Return -error
-   or 0. */
+/*
+ * Apply the given relocation to the (simplified) ELF.  Return -error
+ * or 0.
+ */
+#ifdef CONFIG_MODULES_USE_ELF_REL
 int apply_relocate(Elf_Shdr *sechdrs,
 		   const char *strtab,
 		   unsigned int symindex,
 		   unsigned int relsec,
 		   struct module *mod);
+#else
+static inline int apply_relocate(Elf_Shdr *sechdrs,
+				 const char *strtab,
+				 unsigned int symindex,
+				 unsigned int relsec,
+				 struct module *me)
+{
+	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	return -ENOEXEC;
+}
+#endif
 
-/* Apply the given add relocation to the (simplified) ELF.  Return
-   -error or 0 */
+/*
+ * Apply the given add relocation to the (simplified) ELF.  Return
+ * -error or 0
+ */
+#ifdef CONFIG_MODULES_USE_ELF_RELA
 int apply_relocate_add(Elf_Shdr *sechdrs,
 		       const char *strtab,
 		       unsigned int symindex,
 		       unsigned int relsec,
 		       struct module *mod);
+#else
+static inline int apply_relocate_add(Elf_Shdr *sechdrs,
+				     const char *strtab,
+				     unsigned int symindex,
+				     unsigned int relsec,
+				     struct module *me)
+{
+	printk(KERN_ERR "module %s: REL relocation unsupported\n", me->name);
+	return -ENOEXEC;
+}
+#endif
 
 /* Any final processing of module before access.  Return -error or 0. */
 int module_finalize(const Elf_Ehdr *hdr,
diff --git a/kernel/module.c b/kernel/module.c
index 9ad9ee9406d6..7f2ee45f362c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1949,26 +1949,6 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
 	return ret;
 }
 
-int __weak apply_relocate(Elf_Shdr *sechdrs,
-			  const char *strtab,
-			  unsigned int symindex,
-			  unsigned int relsec,
-			  struct module *me)
-{
-	pr_err("module %s: REL relocation unsupported\n", me->name);
-	return -ENOEXEC;
-}
-
-int __weak apply_relocate_add(Elf_Shdr *sechdrs,
-			      const char *strtab,
-			      unsigned int symindex,
-			      unsigned int relsec,
-			      struct module *me)
-{
-	pr_err("module %s: RELA relocation unsupported\n", me->name);
-	return -ENOEXEC;
-}
-
 static int apply_relocations(struct module *mod, const struct load_info *info)
 {
 	unsigned int i;
-- 
cgit v1.2.3


From eccbb05a64fef867362ff05b5d266757e3c82b36 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 28 Sep 2012 15:05:15 +0930
Subject: virtio: remove CONFIG_VIRTIO_RING

Everyone who selects VIRTIO is also made to select VIRTIO_RING; just make
them synonymous, since we removed the indirection layer some time ago.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 arch/s390/Kconfig       | 1 -
 arch/x86/lguest/Kconfig | 1 -
 drivers/rpmsg/Kconfig   | 1 -
 drivers/virtio/Kconfig  | 6 ------
 drivers/virtio/Makefile | 3 +--
 5 files changed, 1 insertion(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 107610e01a29..9faf3d9b26c7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -587,7 +587,6 @@ config S390_GUEST
 	depends on 64BIT && EXPERIMENTAL
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_RING
 	select VIRTIO_CONSOLE
 	help
 	  Select this option if you want to run the kernel as a guest under
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig
index 6e121a2a49e1..7872a3330fb5 100644
--- a/arch/x86/lguest/Kconfig
+++ b/arch/x86/lguest/Kconfig
@@ -4,7 +4,6 @@ config LGUEST_GUEST
 	depends on X86_32
 	select VIRTUALIZATION
 	select VIRTIO
-	select VIRTIO_RING
 	select VIRTIO_CONSOLE
 	help
 	  Lguest is a tiny in-kernel hypervisor.  Selecting this will
diff --git a/drivers/rpmsg/Kconfig b/drivers/rpmsg/Kconfig
index 32aead65735a..2bd911f12571 100644
--- a/drivers/rpmsg/Kconfig
+++ b/drivers/rpmsg/Kconfig
@@ -4,7 +4,6 @@ menu "Rpmsg drivers (EXPERIMENTAL)"
 config RPMSG
 	tristate
 	select VIRTIO
-	select VIRTIO_RING
 	depends on EXPERIMENTAL
 
 endmenu
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index eabc2a0cd989..8d5bddb56cb1 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -5,17 +5,12 @@ config VIRTIO
 	  bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_LGUEST,
 	  CONFIG_RPMSG or CONFIG_S390_GUEST.
 
-config VIRTIO_RING
-	tristate
-	depends on VIRTIO
-
 menu "Virtio drivers"
 
 config VIRTIO_PCI
 	tristate "PCI driver for virtio devices (EXPERIMENTAL)"
 	depends on PCI && EXPERIMENTAL
 	select VIRTIO
-	select VIRTIO_RING
 	---help---
 	  This drivers provides support for virtio based paravirtual device
 	  drivers over PCI.  This requires that your VMM has appropriate PCI
@@ -40,7 +35,6 @@ config VIRTIO_BALLOON
  	tristate "Platform bus driver for memory mapped virtio devices (EXPERIMENTAL)"
  	depends on HAS_IOMEM && EXPERIMENTAL
  	select VIRTIO
- 	select VIRTIO_RING
  	---help---
  	 This drivers provides support for memory mapped virtio
 	 platform device driver.
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index 5a4c63cfd380..9076635697bb 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -1,5 +1,4 @@
-obj-$(CONFIG_VIRTIO) += virtio.o
-obj-$(CONFIG_VIRTIO_RING) += virtio_ring.o
+obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
-- 
cgit v1.2.3


From fd0f5869724ff6195c6e7f12f8287c66a132e0ba Mon Sep 17 00:00:00 2001
From: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Date: Wed, 26 Sep 2012 11:11:28 +0900
Subject: x86: Distinguish TLB shootdown interrupts from other functions call
 interrupts

As TLB shootdown requests to other CPU cores are now using function call
interrupts, TLB shootdowns entry in /proc/interrupts is always shown as 0.

This behavior change was introduced by commit 52aec3308db8 ("x86/tlb:
replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR").

This patch reverts TLB shootdowns entry in /proc/interrupts to count TLB
shootdowns separately from the other function call interrupts.

Signed-off-by: Tomoki Sekiyama <tomoki.sekiyama.qu@hitachi.com>
Link: http://lkml.kernel.org/r/20120926021128.22212.20440.stgit@hpxw
Acked-by: Alex Shi <alex.shi@intel.com>
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/include/asm/hardirq.h | 4 ++++
 arch/x86/kernel/irq.c          | 4 ++--
 arch/x86/mm/tlb.c              | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index d3895dbf4ddb..81f04cee5f74 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -18,6 +18,10 @@ typedef struct {
 #ifdef CONFIG_SMP
 	unsigned int irq_resched_count;
 	unsigned int irq_call_count;
+	/*
+	 * irq_tlb_count is double-counted in irq_call_count, so it must be
+	 * subtracted from irq_call_count when displaying irq_call_count
+	 */
 	unsigned int irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 1f5f1d5d2a02..355b13f6de8d 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -92,7 +92,8 @@ int arch_show_interrupts(struct seq_file *p, int prec)
 	seq_printf(p, "  Rescheduling interrupts\n");
 	seq_printf(p, "%*s: ", prec, "CAL");
 	for_each_online_cpu(j)
-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
+					irq_stats(j)->irq_tlb_count);
 	seq_printf(p, "  Function call interrupts\n");
 	seq_printf(p, "%*s: ", prec, "TLB");
 	for_each_online_cpu(j)
@@ -147,7 +148,6 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
 #ifdef CONFIG_SMP
 	sum += irq_stats(cpu)->irq_resched_count;
 	sum += irq_stats(cpu)->irq_call_count;
-	sum += irq_stats(cpu)->irq_tlb_count;
 #endif
 #ifdef CONFIG_X86_THERMAL_VECTOR
 	sum += irq_stats(cpu)->irq_thermal_count;
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
index a085c560b4a5..0777f042e400 100644
--- a/arch/x86/mm/tlb.c
+++ b/arch/x86/mm/tlb.c
@@ -98,6 +98,8 @@ static void flush_tlb_func(void *info)
 {
 	struct flush_tlb_info *f = info;
 
+	inc_irq_stat(irq_tlb_count);
+
 	if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
 		return;
 
-- 
cgit v1.2.3


From 785107923a83d8456bbd8564e288a24d84109a46 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:55:44 -0700
Subject: efi: Defer freeing boot services memory until after ACPI init

Some new ACPI 5.0 tables reference resources stored in boot services
memory, so keep that memory around until we have ACPI and can extract
data from it.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/baaa6d44bdc4eb0c58e5d1b4ccd2c729f854ac55.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 31 ++++++++++++++++++-------------
 include/linux/efi.h         |  5 +++++
 init/main.c                 |  3 +++
 3 files changed, 26 insertions(+), 13 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f55a4ce6dc49..b3dbbdbd2a42 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -419,10 +419,21 @@ void __init efi_reserve_boot_services(void)
 	}
 }
 
-static void __init efi_free_boot_services(void)
+static void __init efi_unmap_memmap(void)
+{
+	if (memmap.map) {
+		early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
+		memmap.map = NULL;
+	}
+}
+
+void __init efi_free_boot_services(void)
 {
 	void *p;
 
+	if (!efi_native)
+		return;
+
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
 		efi_memory_desc_t *md = p;
 		unsigned long long start = md->phys_addr;
@@ -438,6 +449,8 @@ static void __init efi_free_boot_services(void)
 
 		free_bootmem_late(start, size);
 	}
+
+	efi_unmap_memmap();
 }
 
 static int __init efi_systab_init(void *phys)
@@ -787,8 +800,10 @@ void __init efi_enter_virtual_mode(void)
 	 * non-native EFI
 	 */
 
-	if (!efi_native)
-		goto out;
+	if (!efi_native) {
+		efi_unmap_memmap();
+		return;
+	}
 
 	/* Merge contiguous regions of the same type and attribute */
 	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
@@ -877,13 +892,6 @@ void __init efi_enter_virtual_mode(void)
 		panic("EFI call to SetVirtualAddressMap() failed!");
 	}
 
-	/*
-	 * Thankfully, it does seem that no runtime services other than
-	 * SetVirtualAddressMap() will touch boot services code, so we can
-	 * get rid of it all at this point
-	 */
-	efi_free_boot_services();
-
 	/*
 	 * Now that EFI is in virtual mode, update the function
 	 * pointers in the runtime service table to the new virtual addresses.
@@ -907,9 +915,6 @@ void __init efi_enter_virtual_mode(void)
 	if (__supported_pte_mask & _PAGE_NX)
 		runtime_code_page_mkexec();
 
-out:
-	early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size);
-	memmap.map = NULL;
 	kfree(new_memmap);
 }
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index ec45ccd8708a..5782114f4838 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -496,6 +496,11 @@ extern void efi_map_pal_code (void);
 extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
+#ifdef CONFIG_X86
+extern void efi_free_boot_services(void);
+#else
+static inline void efi_free_boot_services(void) {}
+#endif
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
diff --git a/init/main.c b/init/main.c
index b28673087ac0..d61ec542205c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -631,6 +631,9 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
+	if (efi_enabled)
+		efi_free_boot_services();
+
 	ftrace_init();
 
 	/* Do the rest non-__init'ed, we're now alive */
-- 
cgit v1.2.3


From 7bc90e01c3f66c137e7e761f574bbf883087d590 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:56:08 -0700
Subject: efi: Add a function to look up existing IO memory mappings

The EFI initialization creates virtual mappings for EFI boot services
memory, so if a driver wants to access EFI boot services memory, it
cannot call ioremap itself; doing so will trip the WARN about mapping
RAM twice.  Thus, a driver accessing EFI boot services memory must do so
via the existing mapping already created during EFI intiialization.
Since the EFI code already maintains a memory map for that memory, add a
function efi_lookup_mapped_addr to look up mappings in that memory map.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/0eb48ae012797912874919110660ad420b90268b.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/efi.c | 28 ++++++++++++++++++++++++++++
 include/linux/efi.h         |  1 +
 2 files changed, 29 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index b3dbbdbd2a42..f7f928c315da 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -776,6 +776,34 @@ static void __init runtime_code_page_mkexec(void)
 	}
 }
 
+/*
+ * We can't ioremap data in EFI boot services RAM, because we've already mapped
+ * it as RAM.  So, look it up in the existing EFI memory map instead.  Only
+ * callable after efi_enter_virtual_mode and before efi_free_boot_services.
+ */
+void __iomem *efi_lookup_mapped_addr(u64 phys_addr)
+{
+	void *p;
+	if (WARN_ON(!memmap.map))
+		return NULL;
+	for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) {
+		efi_memory_desc_t *md = p;
+		u64 size = md->num_pages << EFI_PAGE_SHIFT;
+		u64 end = md->phys_addr + size;
+		if (!(md->attribute & EFI_MEMORY_RUNTIME) &&
+		    md->type != EFI_BOOT_SERVICES_CODE &&
+		    md->type != EFI_BOOT_SERVICES_DATA)
+			continue;
+		if (!md->virt_addr)
+			continue;
+		if (phys_addr >= md->phys_addr && phys_addr < end) {
+			phys_addr += md->virt_addr - md->phys_addr;
+			return (__force void __iomem *)(unsigned long)phys_addr;
+		}
+	}
+	return NULL;
+}
+
 /*
  * This function will switch the EFI runtime services to virtual mode.
  * Essentially, look through the EFI memmap and map every region that
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 5782114f4838..fff135d9375e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -501,6 +501,7 @@ extern void efi_free_boot_services(void);
 #else
 static inline void efi_free_boot_services(void) {}
 #endif
+extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
 extern u64 efi_get_iobase (void);
 extern u32 efi_mem_type (unsigned long phys_addr);
 extern u64 efi_mem_attributes (unsigned long phys_addr);
-- 
cgit v1.2.3


From 2223af389032425e3d1a70f9cb3a63feaa654ced Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Fri, 28 Sep 2012 17:57:05 -0700
Subject: efi: Fix the ACPI BGRT driver for images located in EFI boot services
 memory

The ACPI BGRT driver accesses the BIOS logo image when it initializes.
However, ACPI 5.0 (which introduces the BGRT) recommends putting the
logo image in EFI boot services memory, so that the OS can reclaim that
memory.  Production systems follow this recommendation, breaking the
ACPI BGRT driver.

Move the bulk of the BGRT code to run during a new EFI late
initialization phase, which occurs after switching EFI to virtual mode,
and after initializing ACPI, but before freeing boot services memory.
Copy the BIOS logo image to kernel memory at that point, and make it
accessible to the BGRT driver.  Rework the existing ACPI BGRT driver to
act as a simple wrapper exposing that image (and the properties from the
BGRT) via sysfs.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: http://lkml.kernel.org/r/93ce9f823f1c1f3bb88bdd662cce08eee7a17f5d.1348876882.git.josh@joshtriplett.org
Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
---
 arch/x86/platform/efi/Makefile   |  1 +
 arch/x86/platform/efi/efi-bgrt.c | 76 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/platform/efi/efi.c      |  6 ++++
 drivers/acpi/Kconfig             |  4 +--
 drivers/acpi/bgrt.c              | 76 +++++-----------------------------------
 include/linux/efi-bgrt.h         | 21 +++++++++++
 include/linux/efi.h              |  2 ++
 init/main.c                      |  4 ++-
 8 files changed, 120 insertions(+), 70 deletions(-)
 create mode 100644 arch/x86/platform/efi/efi-bgrt.c
 create mode 100644 include/linux/efi-bgrt.h

(limited to 'arch/x86')

diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile
index 73b8be0f3675..6db1cc4c7534 100644
--- a/arch/x86/platform/efi/Makefile
+++ b/arch/x86/platform/efi/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_EFI) 		+= efi.o efi_$(BITS).o efi_stub_$(BITS).o
+obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c
new file mode 100644
index 000000000000..f6a0c1b8e518
--- /dev/null
+++ b/arch/x86/platform/efi/efi-bgrt.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright 2012 Intel Corporation
+ * Author: Josh Triplett <josh@joshtriplett.org>
+ *
+ * Based on the bgrt driver:
+ * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
+ * Author: Matthew Garrett
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/acpi.h>
+#include <linux/efi.h>
+#include <linux/efi-bgrt.h>
+
+struct acpi_table_bgrt *bgrt_tab;
+void *bgrt_image;
+size_t bgrt_image_size;
+
+struct bmp_header {
+	u16 id;
+	u32 size;
+} __packed;
+
+void efi_bgrt_init(void)
+{
+	acpi_status status;
+	void __iomem *image;
+	bool ioremapped = false;
+	struct bmp_header bmp_header;
+
+	if (acpi_disabled)
+		return;
+
+	status = acpi_get_table("BGRT", 0,
+	                        (struct acpi_table_header **)&bgrt_tab);
+	if (ACPI_FAILURE(status))
+		return;
+
+	if (bgrt_tab->version != 1)
+		return;
+	if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address)
+		return;
+
+	image = efi_lookup_mapped_addr(bgrt_tab->image_address);
+	if (!image) {
+		image = ioremap(bgrt_tab->image_address, sizeof(bmp_header));
+		ioremapped = true;
+		if (!image)
+			return;
+	}
+
+	memcpy_fromio(&bmp_header, image, sizeof(bmp_header));
+	if (ioremapped)
+		iounmap(image);
+	bgrt_image_size = bmp_header.size;
+
+	bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL);
+	if (!bgrt_image)
+		return;
+
+	if (ioremapped) {
+		image = ioremap(bgrt_tab->image_address, bmp_header.size);
+		if (!image) {
+			kfree(bgrt_image);
+			bgrt_image = NULL;
+			return;
+		}
+	}
+
+	memcpy_fromio(bgrt_image, image, bgrt_image_size);
+	if (ioremapped)
+		iounmap(image);
+}
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index f7f928c315da..aded2a91162a 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/efi.h>
+#include <linux/efi-bgrt.h>
 #include <linux/export.h>
 #include <linux/bootmem.h>
 #include <linux/memblock.h>
@@ -745,6 +746,11 @@ void __init efi_init(void)
 #endif
 }
 
+void __init efi_late_init(void)
+{
+	efi_bgrt_init();
+}
+
 void __init efi_set_executable(efi_memory_desc_t *md, bool executable)
 {
 	u64 addr, npages;
diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 80998958cf45..119d58db8342 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -385,8 +385,8 @@ config ACPI_CUSTOM_METHOD
 	  to override that restriction).
 
 config ACPI_BGRT
-        tristate "Boottime Graphics Resource Table support"
-        default n
+	bool "Boottime Graphics Resource Table support"
+	depends on EFI
         help
 	  This driver adds support for exposing the ACPI Boottime Graphics
 	  Resource Table, which allows the operating system to obtain
diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c
index 6680df36b963..be6039958545 100644
--- a/drivers/acpi/bgrt.c
+++ b/drivers/acpi/bgrt.c
@@ -1,5 +1,6 @@
 /*
  * Copyright 2012 Red Hat, Inc <mjg@redhat.com>
+ * Copyright 2012 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,20 +12,10 @@
 #include <linux/init.h>
 #include <linux/device.h>
 #include <linux/sysfs.h>
-#include <linux/io.h>
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
+#include <linux/efi-bgrt.h>
 
-static struct acpi_table_bgrt *bgrt_tab;
 static struct kobject *bgrt_kobj;
 
-struct bmp_header {
-	u16 id;
-	u32 size;
-} __attribute ((packed));
-
-static struct bmp_header bmp_header;
-
 static ssize_t show_version(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -63,18 +54,7 @@ static DEVICE_ATTR(yoffset, S_IRUGO, show_yoffset, NULL);
 static ssize_t show_image(struct file *file, struct kobject *kobj,
 	       struct bin_attribute *attr, char *buf, loff_t off, size_t count)
 {
-	int size = attr->size;
-	void __iomem *image = attr->private;
-
-	if (off >= size) {
-		count = 0;
-	} else {
-		if (off + count > size)
-			count = size - off;
-
-		memcpy_fromio(buf, image+off, count);
-	}
-
+	memcpy(buf, attr->private + off, count);
 	return count;
 }
 
@@ -101,45 +81,18 @@ static struct attribute_group bgrt_attribute_group = {
 
 static int __init bgrt_init(void)
 {
-	acpi_status status;
 	int ret;
-	void __iomem *bgrt;
 
-	if (acpi_disabled)
-		return -ENODEV;
-
-	status = acpi_get_table("BGRT", 0,
-				(struct acpi_table_header **)&bgrt_tab);
-
-	if (ACPI_FAILURE(status))
+	if (!bgrt_image)
 		return -ENODEV;
 
 	sysfs_bin_attr_init(&image_attr);
-
-	bgrt = ioremap(bgrt_tab->image_address, sizeof(struct bmp_header));
-
-	if (!bgrt) {
-		ret = -EINVAL;
-		goto out_err;
-	}
-
-	memcpy_fromio(&bmp_header, bgrt, sizeof(bmp_header));
-	image_attr.size = bmp_header.size;
-	iounmap(bgrt);
-
-	image_attr.private = ioremap(bgrt_tab->image_address, image_attr.size);
-
-	if (!image_attr.private) {
-		ret = -EINVAL;
-		goto out_err;
-	}
-
+	image_attr.private = bgrt_image;
+	image_attr.size = bgrt_image_size;
 
 	bgrt_kobj = kobject_create_and_add("bgrt", acpi_kobj);
-	if (!bgrt_kobj) {
-		ret = -EINVAL;
-		goto out_iounmap;
-	}
+	if (!bgrt_kobj)
+		return -EINVAL;
 
 	ret = sysfs_create_group(bgrt_kobj, &bgrt_attribute_group);
 	if (ret)
@@ -155,22 +108,11 @@ out_group:
 	sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group);
 out_kobject:
 	kobject_put(bgrt_kobj);
-out_iounmap:
-	iounmap(image_attr.private);
-out_err:
 	return ret;
 }
 
-static void __exit bgrt_exit(void)
-{
-	iounmap(image_attr.private);
-	sysfs_remove_group(bgrt_kobj, &bgrt_attribute_group);
-	sysfs_remove_bin_file(bgrt_kobj, &image_attr);
-}
-
 module_init(bgrt_init);
-module_exit(bgrt_exit);
 
-MODULE_AUTHOR("Matthew Garrett");
+MODULE_AUTHOR("Matthew Garrett, Josh Triplett <josh@joshtriplett.org>");
 MODULE_DESCRIPTION("BGRT boot graphic support");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h
new file mode 100644
index 000000000000..051b21fedf68
--- /dev/null
+++ b/include/linux/efi-bgrt.h
@@ -0,0 +1,21 @@
+#ifndef _LINUX_EFI_BGRT_H
+#define _LINUX_EFI_BGRT_H
+
+#ifdef CONFIG_ACPI_BGRT
+
+#include <linux/acpi.h>
+
+void efi_bgrt_init(void);
+
+/* The BGRT data itself; only valid if bgrt_image != NULL. */
+extern void *bgrt_image;
+extern size_t bgrt_image_size;
+extern struct acpi_table_bgrt *bgrt_tab;
+
+#else /* !CONFIG_ACPI_BGRT */
+
+static inline void efi_bgrt_init(void) {}
+
+#endif /* !CONFIG_ACPI_BGRT */
+
+#endif /* _LINUX_EFI_BGRT_H */
diff --git a/include/linux/efi.h b/include/linux/efi.h
index fff135d9375e..8670eb1eb8cd 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -497,8 +497,10 @@ extern void efi_memmap_walk (efi_freemem_callback_t callback, void *arg);
 extern void efi_gettimeofday (struct timespec *ts);
 extern void efi_enter_virtual_mode (void);	/* switch EFI to virtual mode, if possible */
 #ifdef CONFIG_X86
+extern void efi_late_init(void);
 extern void efi_free_boot_services(void);
 #else
+static inline void efi_late_init(void) {}
 static inline void efi_free_boot_services(void) {}
 #endif
 extern void __iomem *efi_lookup_mapped_addr(u64 phys_addr);
diff --git a/init/main.c b/init/main.c
index d61ec542205c..db34c0ec4711 100644
--- a/init/main.c
+++ b/init/main.c
@@ -631,8 +631,10 @@ asmlinkage void __init start_kernel(void)
 	acpi_early_init(); /* before LAPIC and SMP init */
 	sfi_init_late();
 
-	if (efi_enabled)
+	if (efi_enabled) {
+		efi_late_init();
 		efi_free_boot_services();
+	}
 
 	ftrace_init();
 
-- 
cgit v1.2.3


From 7076aada1040de4ed79a5977dbabdb5e5ea5e249 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Sep 2012 16:44:54 -0400
Subject: x86: split ret_from_fork

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/Kconfig                 |  1 +
 arch/x86/include/asm/processor.h |  5 -----
 arch/x86/kernel/entry_32.S       | 15 ++++++++++-----
 arch/x86/kernel/entry_64.S       | 27 +++++++++++----------------
 arch/x86/kernel/process.c        | 38 --------------------------------------
 arch/x86/kernel/process_32.c     | 31 ++++++++++++++++++++++++-------
 arch/x86/kernel/process_64.c     | 35 +++++++++++++++++++++--------------
 7 files changed, 67 insertions(+), 85 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 8ec3a1aa4abd..d93eb9d1bb97 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -97,6 +97,7 @@ config X86
 	select KTIME_SCALAR if X86_32
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select GENERIC_KERNEL_THREAD
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index d048cad9bcad..078f3fdedf95 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -589,11 +589,6 @@ typedef struct {
 } mm_segment_t;
 
 
-/*
- * create a kernel thread without removing it from tasklists
- */
-extern int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
-
 /* Free all resources held by a thread. */
 extern void release_thread(struct task_struct *);
 
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 623f28837476..ac1107346fc9 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -994,15 +994,20 @@ END(spurious_interrupt_bug)
  */
 	.popsection
 
-ENTRY(kernel_thread_helper)
-	pushl $0		# fake return address for unwinder
+ENTRY(ret_from_kernel_thread)
 	CFI_STARTPROC
-	movl %edi,%eax
-	call *%esi
+	pushl_cfi %eax
+	call schedule_tail
+	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
 	call do_exit
 	ud2			# padding for call trace
 	CFI_ENDPROC
-ENDPROC(kernel_thread_helper)
+ENDPROC(ret_from_kernel_thread)
 
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 69babd8c834f..5526d17db676 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -450,7 +450,7 @@ ENTRY(ret_from_fork)
 	RESTORE_REST
 
 	testl $3, CS-ARGOFFSET(%rsp)		# from kernel_thread?
-	jz   retint_restore_args
+	jz   1f
 
 	testl $_TIF_IA32, TI_flags(%rcx)	# 32-bit compat task needs IRET
 	jnz  int_ret_from_sys_call
@@ -458,6 +458,16 @@ ENTRY(ret_from_fork)
 	RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
+1:
+	subq $REST_SKIP, %rsp	# move the stack pointer back
+	CFI_ADJUST_CFA_OFFSET	REST_SKIP
+	movq %rbp, %rdi
+	call *%rbx
+	# exit
+	mov %eax, %edi
+	call do_exit
+	ud2			# padding for call trace
+
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -1206,21 +1216,6 @@ bad_gs:
 	jmp  2b
 	.previous
 
-ENTRY(kernel_thread_helper)
-	pushq $0		# fake return address
-	CFI_STARTPROC
-	/*
-	 * Here we are in the child and the registers are set as they were
-	 * at kernel_thread() invocation in the parent.
-	 */
-	call *%rsi
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-END(kernel_thread_helper)
-
 /*
  * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
  *
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 7162e9c1f598..6947ec968bf8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -298,44 +298,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
-/*
- * This gets run with %si containing the
- * function to call, and %di containing
- * the "args".
- */
-extern void kernel_thread_helper(void);
-
-/*
- * Create a kernel thread
- */
-int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
-{
-	struct pt_regs regs;
-
-	memset(&regs, 0, sizeof(regs));
-
-	regs.si = (unsigned long) fn;
-	regs.di = (unsigned long) arg;
-
-#ifdef CONFIG_X86_32
-	regs.ds = __USER_DS;
-	regs.es = __USER_DS;
-	regs.fs = __KERNEL_PERCPU;
-	regs.gs = __KERNEL_STACK_CANARY;
-#else
-	regs.ss = __KERNEL_DS;
-#endif
-
-	regs.orig_ax = -1;
-	regs.ip = (unsigned long) kernel_thread_helper;
-	regs.cs = __KERNEL_CS | get_kernel_rpl();
-	regs.flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
-
-	/* Ok, create the new process.. */
-	return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);
-}
-EXPORT_SYMBOL(kernel_thread);
-
 /*
  * sys_execve() executes a new program.
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 75fcad146def..c9939875d267 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,7 @@
 #include <asm/switch_to.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
+asmlinkage void ret_from_kernel_thread(void) __asm__("ret_from_kernel_thread");
 
 /*
  * Return saved PC of a blocked thread.
@@ -127,23 +128,39 @@ void release_thread(struct task_struct *dead_task)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-	unsigned long unused,
+	unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
-	struct pt_regs *childregs;
+	struct pt_regs *childregs = task_pt_regs(p);
 	struct task_struct *tsk;
 	int err;
 
-	childregs = task_pt_regs(p);
+	p->thread.sp = (unsigned long) childregs;
+	p->thread.sp0 = (unsigned long) (childregs+1);
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		p->thread.ip = (unsigned long) ret_from_kernel_thread;
+		task_user_gs(p) = __KERNEL_STACK_CANARY;
+		childregs->ds = __USER_DS;
+		childregs->es = __USER_DS;
+		childregs->fs = __KERNEL_PERCPU;
+		childregs->bx = sp;	/* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		p->fpu_counter = 0;
+		p->thread.io_bitmap_ptr = NULL;
+		memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+		return 0;
+	}
 	*childregs = *regs;
 	childregs->ax = 0;
 	childregs->sp = sp;
 
-	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
-
 	p->thread.ip = (unsigned long) ret_from_fork;
-
 	task_user_gs(p) = get_user_gs(regs);
 
 	p->fpu_counter = 0;
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 0a980c9d7cb8..937f2af6f2d4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -146,29 +146,18 @@ static inline u32 read_32bit_tls(struct task_struct *t, int tls)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long unused,
+		unsigned long arg,
 	struct task_struct *p, struct pt_regs *regs)
 {
 	int err;
 	struct pt_regs *childregs;
 	struct task_struct *me = current;
 
-	childregs = ((struct pt_regs *)
-			(THREAD_SIZE + task_stack_page(p))) - 1;
-	*childregs = *regs;
-
-	childregs->ax = 0;
-	if (user_mode(regs))
-		childregs->sp = sp;
-	else
-		childregs->sp = (unsigned long)childregs;
-
+	p->thread.sp0 = (unsigned long)task_stack_page(p) + THREAD_SIZE;
+	childregs = task_pt_regs(p);
 	p->thread.sp = (unsigned long) childregs;
-	p->thread.sp0 = (unsigned long) (childregs+1);
 	p->thread.usersp = me->thread.usersp;
-
 	set_tsk_thread_flag(p, TIF_FORK);
-
 	p->fpu_counter = 0;
 	p->thread.io_bitmap_ptr = NULL;
 
@@ -178,6 +167,24 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 	p->thread.fs = p->thread.fsindex ? 0 : me->thread.fs;
 	savesegment(es, p->thread.es);
 	savesegment(ds, p->thread.ds);
+	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
+
+	if (unlikely(!regs)) {
+		/* kernel thread */
+		memset(childregs, 0, sizeof(struct pt_regs));
+		childregs->sp = (unsigned long)childregs;
+		childregs->ss = __KERNEL_DS;
+		childregs->bx = sp; /* function */
+		childregs->bp = arg;
+		childregs->orig_ax = -1;
+		childregs->cs = __KERNEL_CS | get_kernel_rpl();
+		childregs->flags = X86_EFLAGS_IF | X86_EFLAGS_BIT1;
+		return 0;
+	}
+	*childregs = *regs;
+
+	childregs->ax = 0;
+	childregs->sp = sp;
 
 	err = -ENOMEM;
 	memset(p->thread.ptrace_bps, 0, sizeof(p->thread.ptrace_bps));
-- 
cgit v1.2.3


From 6783eaa2e1253fbcbe2c2f6bb4c843abf1343caf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 2 Aug 2012 23:05:11 +0400
Subject: x86, um/x86: switch to generic sys_execve and kernel_execve

32bit wrapper is lost on that; 64bit one is *not*, since
we need to arrange for full pt_regs on stack when we call
sys_execve() and we need to load callee-saved ones from
there afterwards.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/kernel/exec.c            | 25 ++-------------------
 arch/um/kernel/internal.h        |  1 -
 arch/um/kernel/syscall.c         | 17 ---------------
 arch/x86/ia32/ia32entry.S        |  2 +-
 arch/x86/ia32/sys_ia32.c         | 15 -------------
 arch/x86/include/asm/sys_ia32.h  |  2 --
 arch/x86/include/asm/syscalls.h  |  2 +-
 arch/x86/include/asm/unistd.h    |  2 ++
 arch/x86/kernel/Makefile         |  2 +-
 arch/x86/kernel/asm-offsets.c    |  3 +++
 arch/x86/kernel/entry_32.S       | 11 +++++++---
 arch/x86/kernel/entry_64.S       | 47 ++++++++++++----------------------------
 arch/x86/kernel/process.c        | 19 ----------------
 arch/x86/kernel/process_32.c     |  1 +
 arch/x86/kernel/sys_i386_32.c    | 40 ----------------------------------
 arch/x86/syscalls/syscall_32.tbl |  2 +-
 arch/x86/um/sys_call_table_32.c  |  1 -
 17 files changed, 34 insertions(+), 158 deletions(-)
 delete mode 100644 arch/um/kernel/internal.h
 delete mode 100644 arch/x86/kernel/sys_i386_32.c

(limited to 'arch/x86')

diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 8c82786da823..e427301f55d6 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -16,7 +16,6 @@
 #include "mem_user.h"
 #include "skas.h"
 #include "os.h"
-#include "internal.h"
 
 void flush_thread(void)
 {
@@ -49,27 +48,7 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 }
 EXPORT_SYMBOL(start_thread);
 
-long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env)
+void __noreturn ret_from_kernel_execve(struct pt_regs *unused)
 {
-	long err;
-
-	err = do_execve(file, argv, env, &current->thread.regs);
-	if (!err)
-		UML_LONGJMP(current->thread.exec_buf, 1);
-	return err;
-}
-
-long sys_execve(const char __user *file, const char __user *const __user *argv,
-		const char __user *const __user *env)
-{
-	long error;
-	char *filename;
-
-	filename = getname(file);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename)) goto out;
-	error = do_execve(filename, argv, env, &current->thread.regs);
-	putname(filename);
- out:
-	return error;
+	UML_LONGJMP(current->thread.exec_buf, 1);
 }
diff --git a/arch/um/kernel/internal.h b/arch/um/kernel/internal.h
deleted file mode 100644
index 5bf97db24a04..000000000000
--- a/arch/um/kernel/internal.h
+++ /dev/null
@@ -1 +0,0 @@
-extern long um_execve(const char *file, const char __user *const __user *argv, const char __user *const __user *env);
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a4c6d8eee74c..a5639c472772 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -13,7 +13,6 @@
 #include "asm/mman.h"
 #include "asm/uaccess.h"
 #include "asm/unistd.h"
-#include "internal.h"
 
 long sys_fork(void)
 {
@@ -50,19 +49,3 @@ long old_mmap(unsigned long addr, unsigned long len,
  out:
 	return err;
 }
-
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	mm_segment_t fs;
-	int ret;
-
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-	ret = um_execve(filename, (const char __user *const __user *)argv,
-			(const char __user *const __user *) envp);
-	set_fs(fs);
-
-	return ret;
-}
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 20e5f7ba0e6b..e75f941bd2b2 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -459,7 +459,7 @@ GLOBAL(\label)
 	PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi
 	PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi
 	PTREGSCALL stub32_sigaltstack, sys32_sigaltstack, %rdx
-	PTREGSCALL stub32_execve, sys32_execve, %rcx
+	PTREGSCALL stub32_execve, compat_sys_execve, %rcx
 	PTREGSCALL stub32_fork, sys_fork, %rdi
 	PTREGSCALL stub32_clone, sys32_clone, %rdx
 	PTREGSCALL stub32_vfork, sys_vfork, %rdi
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 4540bece0946..6b31144589d0 100644
--- a/arch/x86/ia32/sys_ia32.c
+++ b/arch/x86/ia32/sys_ia32.c
@@ -385,21 +385,6 @@ asmlinkage long sys32_sendfile(int out_fd, int in_fd,
 	return ret;
 }
 
-asmlinkage long sys32_execve(const char __user *name, compat_uptr_t __user *argv,
-			     compat_uptr_t __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = compat_do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 asmlinkage long sys32_clone(unsigned int clone_flags, unsigned int newsp,
 			    struct pt_regs *regs)
 {
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h
index 3fda9db48819..1ac127f41fe6 100644
--- a/arch/x86/include/asm/sys_ia32.h
+++ b/arch/x86/include/asm/sys_ia32.h
@@ -54,8 +54,6 @@ asmlinkage long sys32_pwrite(unsigned int, const char __user *, u32, u32, u32);
 asmlinkage long sys32_personality(unsigned long);
 asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32);
 
-asmlinkage long sys32_execve(const char __user *, compat_uptr_t __user *,
-			     compat_uptr_t __user *, struct pt_regs *);
 asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *);
 
 long sys32_lseek(unsigned int, int, unsigned int);
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index f1d8b441fc77..2be0b880417e 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -25,7 +25,7 @@ int sys_fork(struct pt_regs *);
 int sys_vfork(struct pt_regs *);
 long sys_execve(const char __user *,
 		const char __user *const __user *,
-		const char __user *const __user *, struct pt_regs *);
+		const char __user *const __user *);
 long sys_clone(unsigned long, unsigned long, void __user *,
 	       void __user *, struct pt_regs *);
 
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 0d9776e9e2dc..55d155560fdf 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -50,6 +50,8 @@
 # define __ARCH_WANT_SYS_TIME
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
+# define __ARCH_WANT_SYS_EXECVE
+# define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 8215e5652d97..566100002233 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,7 +23,7 @@ obj-y			+= time.o ioport.o ldt.o dumpstack.o nmi.o
 obj-y			+= setup.o x86_init.o i8259.o irqinit.o jump_label.o
 obj-$(CONFIG_IRQ_WORK)  += irq_work.o
 obj-y			+= probe_roms.o
-obj-$(CONFIG_X86_32)	+= sys_i386_32.o i386_ksyms_32.o
+obj-$(CONFIG_X86_32)	+= i386_ksyms_32.o
 obj-$(CONFIG_X86_64)	+= sys_x86_64.o x8664_ksyms_64.o
 obj-y			+= syscall_$(BITS).o
 obj-$(CONFIG_X86_64)	+= vsyscall_64.o
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 68de2dc962ec..28610822fb3c 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -69,4 +69,7 @@ void common(void) {
 	OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
 	OFFSET(BP_pref_address, boot_params, hdr.pref_address);
 	OFFSET(BP_code32_start, boot_params, hdr.code32_start);
+
+	BLANK();
+	DEFINE(PTREGS_SIZE, sizeof(struct pt_regs));
 }
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index ac1107346fc9..b6bb69239296 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -298,6 +298,13 @@ ENTRY(ret_from_fork)
 	CFI_ENDPROC
 END(ret_from_fork)
 
+ENTRY(ret_from_kernel_execve)
+	movl %eax, %esp
+	movl $0,PT_EAX(%esp)
+	GET_THREAD_INFO(%ebp)
+	jmp syscall_exit
+END(ret_from_kernel_execve)
+
 /*
  * Interrupt exit functions should be protected against kprobes
  */
@@ -322,8 +329,7 @@ ret_from_intr:
 	andl $(X86_EFLAGS_VM | SEGMENT_RPL_MASK), %eax
 #else
 	/*
-	 * We can be coming here from a syscall done in the kernel space,
-	 * e.g. a failed kernel_execve().
+	 * We can be coming here from child spawned by kernel_thread().
 	 */
 	movl PT_CS(%esp), %eax
 	andl $SEGMENT_RPL_MASK, %eax
@@ -727,7 +733,6 @@ ENDPROC(ptregs_##name)
 PTREGSCALL1(iopl)
 PTREGSCALL0(fork)
 PTREGSCALL0(vfork)
-PTREGSCALL3(execve)
 PTREGSCALL2(sigaltstack)
 PTREGSCALL0(sigreturn)
 PTREGSCALL0(rt_sigreturn)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 5526d17db676..053c9552ffd9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -767,7 +767,6 @@ ENTRY(stub_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
 	call sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
@@ -817,8 +816,7 @@ ENTRY(stub_x32_execve)
 	PARTIAL_FRAME 0
 	SAVE_REST
 	FIXUP_TOP_OF_STACK %r11
-	movq %rsp, %rcx
-	call sys32_execve
+	call compat_sys_execve
 	RESTORE_TOP_OF_STACK %r11
 	movq %rax,RAX(%rsp)
 	RESTORE_REST
@@ -1216,36 +1214,19 @@ bad_gs:
 	jmp  2b
 	.previous
 
-/*
- * execve(). This function needs to use IRET, not SYSRET, to set up all state properly.
- *
- * C extern interface:
- *	 extern long execve(const char *name, char **argv, char **envp)
- *
- * asm input arguments:
- *	rdi: name, rsi: argv, rdx: envp
- *
- * We want to fallback into:
- *	extern long sys_execve(const char *name, char **argv,char **envp, struct pt_regs *regs)
- *
- * do_sys_execve asm fallback arguments:
- *	rdi: name, rsi: argv, rdx: envp, rcx: fake frame on the stack
- */
-ENTRY(kernel_execve)
-	CFI_STARTPROC
-	FAKE_STACK_FRAME $0
-	SAVE_ALL
-	movq %rsp,%rcx
-	call sys_execve
-	movq %rax, RAX(%rsp)
-	RESTORE_REST
-	testq %rax,%rax
-	je int_ret_from_sys_call
-	RESTORE_ARGS
-	UNFAKE_STACK_FRAME
-	ret
-	CFI_ENDPROC
-END(kernel_execve)
+ENTRY(ret_from_kernel_execve)
+	movq %rdi, %rsp
+	movl $0, RAX(%rsp)
+	// RESTORE_REST
+	movq 0*8(%rsp), %r15
+	movq 1*8(%rsp), %r14
+	movq 2*8(%rsp), %r13
+	movq 3*8(%rsp), %r12
+	movq 4*8(%rsp), %rbp
+	movq 5*8(%rsp), %rbx
+	addq $(6*8), %rsp
+	jmp int_ret_from_sys_call
+END(ret_from_kernel_execve)
 
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6947ec968bf8..eae2dd5cd5a0 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -298,25 +298,6 @@ sys_clone(unsigned long clone_flags, unsigned long newsp,
 	return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
 }
 
-/*
- * sys_execve() executes a new program.
- */
-long sys_execve(const char __user *name,
-		const char __user *const __user *argv,
-		const char __user *const __user *envp, struct pt_regs *regs)
-{
-	long error;
-	char *filename;
-
-	filename = getname(name);
-	error = PTR_ERR(filename);
-	if (IS_ERR(filename))
-		return error;
-	error = do_execve(filename, argv, envp, regs);
-	putname(filename);
-	return error;
-}
-
 /*
  * Idle related variables and functions
  */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index c9939875d267..25e7e9390d26 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -207,6 +207,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 	regs->cs		= __USER_CS;
 	regs->ip		= new_ip;
 	regs->sp		= new_sp;
+	regs->flags		= X86_EFLAGS_IF;
 	/*
 	 * Free the old FP and other extended state
 	 */
diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
deleted file mode 100644
index 0b0cb5fede19..000000000000
--- a/arch/x86/kernel/sys_i386_32.c
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * This file contains various random system calls that
- * have a non-standard calling sequence on the Linux/i386
- * platform.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/fs.h>
-#include <linux/smp.h>
-#include <linux/sem.h>
-#include <linux/msg.h>
-#include <linux/shm.h>
-#include <linux/stat.h>
-#include <linux/syscalls.h>
-#include <linux/mman.h>
-#include <linux/file.h>
-#include <linux/utsname.h>
-#include <linux/ipc.h>
-
-#include <linux/uaccess.h>
-#include <linux/unistd.h>
-
-#include <asm/syscalls.h>
-
-/*
- * Do a system call from kernel instead of calling sys_execve so we
- * end up with proper pt_regs.
- */
-int kernel_execve(const char *filename,
-		  const char *const argv[],
-		  const char *const envp[])
-{
-	long __res;
-	asm volatile ("int $0x80"
-	: "=a" (__res)
-	: "0" (__NR_execve), "b" (filename), "c" (argv), "d" (envp) : "memory");
-	return __res;
-}
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 7a35a6e71d44..a47103fbc692 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -17,7 +17,7 @@
 8	i386	creat			sys_creat
 9	i386	link			sys_link
 10	i386	unlink			sys_unlink
-11	i386	execve			ptregs_execve			stub32_execve
+11	i386	execve			sys_execve			stub32_execve
 12	i386	chdir			sys_chdir
 13	i386	time			sys_time			compat_sys_time
 14	i386	mknod			sys_mknod
diff --git a/arch/x86/um/sys_call_table_32.c b/arch/x86/um/sys_call_table_32.c
index b5408cecac6c..232e60504b3a 100644
--- a/arch/x86/um/sys_call_table_32.c
+++ b/arch/x86/um/sys_call_table_32.c
@@ -25,7 +25,6 @@
 #define old_mmap sys_old_mmap
 
 #define ptregs_fork sys_fork
-#define ptregs_execve sys_execve
 #define ptregs_iopl sys_iopl
 #define ptregs_vm86old sys_vm86old
 #define ptregs_clone i386_clone
-- 
cgit v1.2.3


From 1f02ab4a237086095bd584f3446da307ac2d02e6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 21 Sep 2012 20:32:29 -0400
Subject: um: switch to generic kernel_thread()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  2 --
 arch/um/kernel/process.c                | 17 +++--------------
 arch/x86/um/Kconfig                     |  1 +
 3 files changed, 4 insertions(+), 16 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 33a6a2423bd2..5d9ab0c4f488 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -63,8 +63,6 @@ static inline void release_thread(struct task_struct *task)
 {
 }
 
-extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
-
 extern unsigned long thread_saved_pc(struct task_struct *t);
 
 static inline void mm_copy_segments(struct mm_struct *from_mm,
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5f5afa50745..a1b50add48a2 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -69,18 +69,6 @@ unsigned long alloc_stack(int order, int atomic)
 	return page;
 }
 
-int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
-{
-	int pid;
-
-	current->thread.request.u.thread.proc = fn;
-	current->thread.request.u.thread.arg = arg;
-	pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0,
-		      &current->thread.regs, 0, NULL, NULL);
-	return pid;
-}
-EXPORT_SYMBOL(kernel_thread);
-
 static inline void set_current(struct task_struct *task)
 {
 	cpu_tasks[task_thread_info(task)->cpu] = ((struct cpu_task)
@@ -177,7 +165,7 @@ void fork_handler(void)
 }
 
 int copy_thread(unsigned long clone_flags, unsigned long sp,
-		unsigned long stack_top, struct task_struct * p,
+		unsigned long arg, struct task_struct * p,
 		struct pt_regs *regs)
 {
 	void (*handler)(void);
@@ -198,7 +186,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		arch_copy_thread(&current->thread.arch, &p->thread.arch);
 	} else {
 		get_safe_registers(p->thread.regs.regs.gp, p->thread.regs.regs.fp);
-		p->thread.request.u.thread = current->thread.request.u.thread;
+		p->thread.request.u.thread.proc = (int (*)(void *))sp;
+		p->thread.request.u.thread.arg = (void *)arg;
 		handler = new_thread_handler;
 	}
 
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index 9926e11a772d..da85b6fc8e8e 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -13,6 +13,7 @@ endmenu
 config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
+	select GENERIC_KERNEL_THREAD
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
-- 
cgit v1.2.3


From 969ae0bfb079de892a95aa4ca1e8ac76c4beb57c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 26 May 2012 01:07:39 -0400
Subject: x86: get rid of duplicate code in case of CONFIG_VM86

no need to have the call of do_notify_resume() + checks around it
duplicated for vm86 case - a bit of rearranging of ifdefs and we'll
have a perfectly fine copy to jump back to.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/x86/kernel/entry_32.S | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index b6bb69239296..fe4cc305d8da 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -618,6 +618,10 @@ work_notifysig:				# deal with pending signals and
 	movl %esp, %eax
 	jne work_notifysig_v86		# returning to kernel-space or
 					# vm86-space
+1:
+#else
+	movl %esp, %eax
+#endif
 	TRACE_IRQS_ON
 	ENABLE_INTERRUPTS(CLBR_NONE)
 	movb PT_CS(%esp), %bl
@@ -628,24 +632,15 @@ work_notifysig:				# deal with pending signals and
 	call do_notify_resume
 	jmp resume_userspace
 
+#ifdef CONFIG_VM86
 	ALIGN
 work_notifysig_v86:
 	pushl_cfi %ecx			# save ti_flags for do_notify_resume
 	call save_v86_state		# %eax contains pt_regs pointer
 	popl_cfi %ecx
 	movl %eax, %esp
-#else
-	movl %esp, %eax
+	jmp 1b
 #endif
-	TRACE_IRQS_ON
-	ENABLE_INTERRUPTS(CLBR_NONE)
-	movb PT_CS(%esp), %bl
-	andb $SEGMENT_RPL_MASK, %bl
-	cmpb $USER_RPL, %bl
-	jb resume_kernel
-	xorl %edx, %edx
-	call do_notify_resume
-	jmp resume_userspace
 END(work_pending)
 
 	# perform syscall exit tracing
-- 
cgit v1.2.3


From a1ce39288e6fbefdd8d607021d02384eb4a20b99 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:25 +0100
Subject: UAPI: (Scripted) Convert #include "..." to #include <path/...> in
 kernel system headers

Convert #include "..." to #include <path/...> in kernel system headers.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/arm/include/asm/page.h                      |  2 +-
 arch/arm/include/asm/pgtable.h                   |  2 +-
 arch/arm/include/asm/vfpmacros.h                 |  2 +-
 arch/cris/include/arch-v10/arch/sv_addr_ag.h     |  2 +-
 arch/cris/include/arch-v10/arch/svinto.h         |  2 +-
 arch/cris/include/arch-v32/arch/dma.h            |  2 +-
 arch/cris/include/arch-v32/arch/hwregs/dma.h     |  2 +-
 arch/m68k/include/asm/cacheflush.h               |  4 +--
 arch/m68k/include/asm/io.h                       |  4 +--
 arch/m68k/include/asm/m68360.h                   |  8 +++---
 arch/m68k/include/asm/m68360_enet.h              |  2 +-
 arch/m68k/include/asm/page.h                     |  4 +--
 arch/m68k/include/asm/pgtable.h                  |  4 +--
 arch/m68k/include/asm/q40_master.h               |  2 +-
 arch/m68k/include/asm/uaccess.h                  |  4 +--
 arch/microblaze/include/asm/mmu_context.h        |  2 +-
 arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h  |  2 +-
 arch/mips/include/asm/mach-pnx833x/gpio.h        |  2 +-
 arch/mips/include/asm/octeon/cvmx-asm.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx-cmd-queue.h    |  2 +-
 arch/mips/include/asm/octeon/cvmx-fpa.h          |  4 +--
 arch/mips/include/asm/octeon/cvmx-helper-board.h |  2 +-
 arch/mips/include/asm/octeon/cvmx-helper.h       | 20 ++++++-------
 arch/mips/include/asm/octeon/cvmx-mdio.h         |  2 +-
 arch/mips/include/asm/octeon/cvmx-pip.h          |  6 ++--
 arch/mips/include/asm/octeon/cvmx-pko.h          |  8 +++---
 arch/mips/include/asm/octeon/cvmx-pow.h          |  4 +--
 arch/mips/include/asm/octeon/cvmx-spi.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx-spinlock.h     |  2 +-
 arch/mips/include/asm/octeon/cvmx-wqe.h          |  2 +-
 arch/mips/include/asm/octeon/cvmx.h              | 36 ++++++++++++------------
 arch/mips/include/asm/octeon/octeon-model.h      |  2 +-
 arch/mips/include/asm/octeon/octeon.h            |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_int.h       |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_l2c.h       |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_mc.h        |  2 +-
 arch/mips/include/asm/sibyte/bcm1480_regs.h      |  4 +--
 arch/mips/include/asm/sibyte/bcm1480_scd.h       |  4 +--
 arch/mips/include/asm/sibyte/sb1250_dma.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_genbus.h     |  2 +-
 arch/mips/include/asm/sibyte/sb1250_int.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_l2c.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_ldt.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_mac.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_mc.h         |  2 +-
 arch/mips/include/asm/sibyte/sb1250_regs.h       |  2 +-
 arch/mips/include/asm/sibyte/sb1250_scd.h        |  2 +-
 arch/mips/include/asm/sibyte/sb1250_smbus.h      |  2 +-
 arch/mips/include/asm/sibyte/sb1250_syncser.h    |  2 +-
 arch/mips/include/asm/sibyte/sb1250_uart.h       |  2 +-
 arch/powerpc/include/asm/ps3.h                   |  2 +-
 arch/powerpc/include/asm/ucc_fast.h              |  2 +-
 arch/powerpc/include/asm/ucc_slow.h              |  2 +-
 arch/sh/include/asm/bl_bit.h                     |  4 +--
 arch/sh/include/asm/cache_insns.h                |  4 +--
 arch/sh/include/asm/checksum.h                   |  2 +-
 arch/sh/include/asm/mmu_context.h                |  4 +--
 arch/sh/include/asm/posix_types.h                |  8 +++---
 arch/sh/include/asm/processor.h                  |  4 +--
 arch/sh/include/asm/ptrace.h                     |  4 +--
 arch/sh/include/asm/string.h                     |  4 +--
 arch/sh/include/asm/switch_to.h                  |  4 +--
 arch/sh/include/asm/syscall.h                    |  4 +--
 arch/sh/include/asm/syscalls.h                   |  4 +--
 arch/sh/include/asm/tlb.h                        |  2 +-
 arch/sh/include/asm/traps.h                      |  4 +--
 arch/sh/include/asm/uaccess.h                    |  4 +--
 arch/sh/include/asm/unistd.h                     |  8 +++---
 arch/sh/include/mach-ecovec24/mach/romimage.h    |  2 +-
 arch/sh/include/mach-kfr2r09/mach/romimage.h     |  2 +-
 arch/tile/include/gxio/dma_queue.h               |  2 +-
 arch/tile/include/gxio/mpipe.h                   |  4 +--
 arch/tile/include/gxio/trio.h                    |  4 +--
 arch/tile/include/gxio/usb_host.h                |  2 +-
 arch/tile/include/hv/iorpc.h                     |  2 +-
 arch/unicore32/include/mach/PKUnity.h            | 36 ++++++++++++------------
 arch/unicore32/include/mach/hardware.h           |  2 +-
 arch/unicore32/include/mach/uncompress.h         |  4 +--
 arch/x86/include/asm/atomic.h                    |  4 +--
 arch/x86/include/asm/calling.h                   |  2 +-
 arch/x86/include/asm/checksum.h                  |  4 +--
 arch/x86/include/asm/cmpxchg.h                   |  4 +--
 arch/x86/include/asm/mmzone.h                    |  4 +--
 arch/x86/include/asm/mutex.h                     |  4 +--
 arch/x86/include/asm/numa.h                      |  4 +--
 arch/x86/include/asm/pci.h                       |  2 +-
 arch/x86/include/asm/pgtable.h                   |  4 +--
 arch/x86/include/asm/pgtable_types.h             |  4 +--
 arch/x86/include/asm/posix_types.h               | 10 +++----
 arch/x86/include/asm/seccomp.h                   |  4 +--
 arch/x86/include/asm/string.h                    |  4 +--
 arch/x86/include/asm/suspend.h                   |  4 +--
 arch/x86/include/asm/uaccess.h                   |  4 +--
 arch/x86/include/asm/user.h                      |  4 +--
 arch/x86/include/asm/xen/interface.h             |  4 +--
 arch/x86/include/asm/xor.h                       |  4 +--
 arch/x86/include/asm/xor_32.h                    |  2 +-
 arch/x86/include/asm/xor_64.h                    |  2 +-
 include/acpi/acpi.h                              | 18 ++++++------
 include/acpi/acpiosxf.h                          |  4 +--
 include/acpi/acpixf.h                            |  6 ++--
 include/acpi/platform/acenv.h                    |  2 +-
 include/acpi/platform/aclinux.h                  |  2 +-
 include/drm/drm.h                                |  2 +-
 include/drm/drmP.h                               | 14 ++++-----
 include/drm/drm_buffer.h                         |  2 +-
 include/drm/drm_encoder_slave.h                  |  4 +--
 include/drm/drm_memory.h                         |  2 +-
 include/drm/drm_sarea.h                          |  2 +-
 include/drm/exynos_drm.h                         |  2 +-
 include/drm/i915_drm.h                           |  2 +-
 include/drm/mga_drm.h                            |  2 +-
 include/drm/radeon_drm.h                         |  2 +-
 include/drm/ttm/ttm_bo_api.h                     |  2 +-
 include/drm/ttm/ttm_bo_driver.h                  | 16 +++++------
 include/drm/ttm/ttm_execbuf_util.h               |  2 +-
 include/drm/ttm/ttm_lock.h                       |  2 +-
 include/drm/ttm/ttm_object.h                     |  2 +-
 include/drm/ttm/ttm_page_alloc.h                 |  4 +--
 include/drm/via_drm.h                            |  2 +-
 include/linux/bcma/bcma.h                        |  2 +-
 include/linux/ceph/ceph_fs.h                     |  4 +--
 include/linux/ceph/debugfs.h                     |  4 +--
 include/linux/ceph/decode.h                      |  2 +-
 include/linux/ceph/libceph.h                     | 14 ++++-----
 include/linux/ceph/mdsmap.h                      |  2 +-
 include/linux/ceph/messenger.h                   |  4 +--
 include/linux/ceph/mon_client.h                  |  2 +-
 include/linux/ceph/msgpool.h                     |  2 +-
 include/linux/ceph/osdmap.h                      |  4 +--
 include/linux/ceph/rados.h                       |  2 +-
 include/linux/ceph/types.h                       |  6 ++--
 include/linux/crush/mapper.h                     |  2 +-
 include/linux/drbd_tag_magic.h                   |  8 +++---
 include/linux/libfdt.h                           |  4 +--
 include/linux/netfilter/nf_conntrack_h323_asn1.h |  2 +-
 include/linux/pinctrl/consumer.h                 |  2 +-
 include/linux/pinctrl/machine.h                  |  2 +-
 include/linux/pinctrl/pinctrl.h                  |  2 +-
 include/linux/pinctrl/pinmux.h                   |  2 +-
 include/scsi/osd_attributes.h                    |  2 +-
 include/scsi/osd_initiator.h                     |  4 +--
 include/scsi/osd_sec.h                           |  4 +--
 include/sound/ac97_codec.h                       |  6 ++--
 include/sound/ad1816a.h                          |  6 ++--
 include/sound/ak4531_codec.h                     |  4 +--
 include/sound/emu10k1_synth.h                    |  4 +--
 include/sound/emu8000.h                          |  4 +--
 include/sound/emux_legacy.h                      |  2 +-
 include/sound/emux_synth.h                       | 14 ++++-----
 include/sound/es1688.h                           |  4 +--
 include/sound/gus.h                              | 10 +++----
 include/sound/mpu401.h                           |  2 +-
 include/sound/pcm.h                              |  2 +-
 include/sound/rawmidi.h                          |  2 +-
 include/sound/sb.h                               |  4 +--
 include/sound/sb16_csp.h                         |  4 +--
 include/sound/seq_kernel.h                       |  2 +-
 include/sound/seq_midi_emul.h                    |  2 +-
 include/sound/seq_midi_event.h                   |  2 +-
 include/sound/seq_oss.h                          |  4 +--
 include/sound/seq_virmidi.h                      |  4 +--
 include/sound/snd_wavefront.h                    |  8 +++---
 include/sound/soundfont.h                        |  4 +--
 include/sound/tea6330t.h                         |  2 +-
 include/sound/wss.h                              |  8 +++---
 include/trace/events/compaction.h                |  2 +-
 include/trace/events/kmem.h                      |  2 +-
 include/trace/events/vmscan.h                    |  2 +-
 include/xen/interface/callback.h                 |  2 +-
 include/xen/interface/hvm/params.h               |  2 +-
 include/xen/interface/io/blkif.h                 |  4 +--
 include/xen/interface/io/netif.h                 |  4 +--
 include/xen/interface/platform.h                 |  2 +-
 include/xen/interface/sched.h                    |  2 +-
 include/xen/interface/version.h                  |  2 +-
 176 files changed, 350 insertions(+), 350 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/include/asm/page.h b/arch/arm/include/asm/page.h
index ecf901902e44..812a4944e783 100644
--- a/arch/arm/include/asm/page.h
+++ b/arch/arm/include/asm/page.h
@@ -19,7 +19,7 @@
 
 #ifndef CONFIG_MMU
 
-#include "page-nommu.h"
+#include <asm/page-nommu.h>
 
 #else
 
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 41dc31f834c3..08c12312a1f9 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #ifndef CONFIG_MMU
 
 #include <asm-generic/4level-fixup.h>
-#include "pgtable-nommu.h"
+#include <asm/pgtable-nommu.h>
 
 #else
 
diff --git a/arch/arm/include/asm/vfpmacros.h b/arch/arm/include/asm/vfpmacros.h
index 3d5fc41ae8d3..a7aadbd9a6dd 100644
--- a/arch/arm/include/asm/vfpmacros.h
+++ b/arch/arm/include/asm/vfpmacros.h
@@ -5,7 +5,7 @@
  */
 #include <asm/hwcap.h>
 
-#include "vfp.h"
+#include <asm/vfp.h>
 
 @ Macros to allow building with old toolkits (with no VFP support)
 	.macro	VFPFMRX, rd, sysreg, cond
diff --git a/arch/cris/include/arch-v10/arch/sv_addr_ag.h b/arch/cris/include/arch-v10/arch/sv_addr_ag.h
index e4a6b68b8982..5517f04153a4 100644
--- a/arch/cris/include/arch-v10/arch/sv_addr_ag.h
+++ b/arch/cris/include/arch-v10/arch/sv_addr_ag.h
@@ -114,7 +114,7 @@
 
 /*------------------------------------------------------------*/
 
-#include "sv_addr.agh"
+#include <arch/sv_addr.agh>
 
 #if __test_sv_addr__
 /* IO_MASK( R_BUS_CONFIG , CE ) */
diff --git a/arch/cris/include/arch-v10/arch/svinto.h b/arch/cris/include/arch-v10/arch/svinto.h
index 0881a1af7cee..da5c15272652 100644
--- a/arch/cris/include/arch-v10/arch/svinto.h
+++ b/arch/cris/include/arch-v10/arch/svinto.h
@@ -1,7 +1,7 @@
 #ifndef _ASM_CRIS_SVINTO_H
 #define _ASM_CRIS_SVINTO_H
 
-#include "sv_addr_ag.h"
+#include <arch/sv_addr_ag.h>
 
 extern unsigned int genconfig_shadow; /* defined and set in head.S */
 
diff --git a/arch/cris/include/arch-v32/arch/dma.h b/arch/cris/include/arch-v32/arch/dma.h
index 61906153a9af..6f92f4f23f28 100644
--- a/arch/cris/include/arch-v32/arch/dma.h
+++ b/arch/cris/include/arch-v32/arch/dma.h
@@ -1 +1 @@
-#include "mach/dma.h"
+#include <mach/dma.h>
diff --git a/arch/cris/include/arch-v32/arch/hwregs/dma.h b/arch/cris/include/arch-v32/arch/hwregs/dma.h
index 3ce322b5c731..52bf67907f28 100644
--- a/arch/cris/include/arch-v32/arch/hwregs/dma.h
+++ b/arch/cris/include/arch-v32/arch/hwregs/dma.h
@@ -7,7 +7,7 @@
 #define dma_h
 
 /* registers */ /* Really needed, since both are listed in sw.list? */
-#include "dma_defs.h"
+#include <arch/hwregs/dma_defs.h>
 
 
 /* descriptors */
diff --git a/arch/m68k/include/asm/cacheflush.h b/arch/m68k/include/asm/cacheflush.h
index a70d7319630a..4fc738209bd1 100644
--- a/arch/m68k/include/asm/cacheflush.h
+++ b/arch/m68k/include/asm/cacheflush.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "cacheflush_no.h"
+#include <asm/cacheflush_no.h>
 #else
-#include "cacheflush_mm.h"
+#include <asm/cacheflush_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h
index c7210ba184ea..c70cc9155003 100644
--- a/arch/m68k/include/asm/io.h
+++ b/arch/m68k/include/asm/io.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "io_no.h"
+#include <asm/io_no.h>
 #else
-#include "io_mm.h"
+#include <asm/io_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/m68360.h b/arch/m68k/include/asm/m68360.h
index eb7d39ef2855..4664180a3ab3 100644
--- a/arch/m68k/include/asm/m68360.h
+++ b/arch/m68k/include/asm/m68360.h
@@ -1,7 +1,7 @@
-#include "m68360_regs.h"
-#include "m68360_pram.h"
-#include "m68360_quicc.h"
-#include "m68360_enet.h"
+#include <asm/m68360_regs.h>
+#include <asm/m68360_pram.h>
+#include <asm/m68360_quicc.h>
+#include <asm/m68360_enet.h>
 
 #ifdef CONFIG_M68360
 
diff --git a/arch/m68k/include/asm/m68360_enet.h b/arch/m68k/include/asm/m68360_enet.h
index c36f4d059203..4d04037c78a2 100644
--- a/arch/m68k/include/asm/m68360_enet.h
+++ b/arch/m68k/include/asm/m68360_enet.h
@@ -10,7 +10,7 @@
 #ifndef __ETHER_H
 #define __ETHER_H
 
-#include "quicc_simple.h"
+#include <asm/quicc_simple.h>
 
 /*
  * transmit BD's
diff --git a/arch/m68k/include/asm/page.h b/arch/m68k/include/asm/page.h
index 98baa82a8615..7c360dac00b7 100644
--- a/arch/m68k/include/asm/page.h
+++ b/arch/m68k/include/asm/page.h
@@ -43,9 +43,9 @@ extern unsigned long _ramend;
 #endif /* !__ASSEMBLY__ */
 
 #ifdef CONFIG_MMU
-#include "page_mm.h"
+#include <asm/page_mm.h>
 #else
-#include "page_no.h"
+#include <asm/page_no.h>
 #endif
 
 #include <asm-generic/getorder.h>
diff --git a/arch/m68k/include/asm/pgtable.h b/arch/m68k/include/asm/pgtable.h
index ee6759eb445a..a3d733b524d2 100644
--- a/arch/m68k/include/asm/pgtable.h
+++ b/arch/m68k/include/asm/pgtable.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "pgtable_no.h"
+#include <asm/pgtable_no.h>
 #else
-#include "pgtable_mm.h"
+#include <asm/pgtable_mm.h>
 #endif
diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h
index 3907a09d4fca..fc5b36278d04 100644
--- a/arch/m68k/include/asm/q40_master.h
+++ b/arch/m68k/include/asm/q40_master.h
@@ -60,7 +60,7 @@
 #define Q40_RTC_WRITE  128
 
 /* define some Q40 specific ints */
-#include "q40ints.h"
+#include <asm/q40ints.h>
 
 /* misc defs */
 #define DAC_LEFT  ((unsigned char *)0xff008000)
diff --git a/arch/m68k/include/asm/uaccess.h b/arch/m68k/include/asm/uaccess.h
index 38f92dbb9a45..639c731568b0 100644
--- a/arch/m68k/include/asm/uaccess.h
+++ b/arch/m68k/include/asm/uaccess.h
@@ -1,5 +1,5 @@
 #ifdef __uClinux__
-#include "uaccess_no.h"
+#include <asm/uaccess_no.h>
 #else
-#include "uaccess_mm.h"
+#include <asm/uaccess_mm.h>
 #endif
diff --git a/arch/microblaze/include/asm/mmu_context.h b/arch/microblaze/include/asm/mmu_context.h
index 24eab1674d3e..0ccd8c402cd9 100644
--- a/arch/microblaze/include/asm/mmu_context.h
+++ b/arch/microblaze/include/asm/mmu_context.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_MMU
-# include "mmu_context_mm.h"
+# include <asm/mmu_context_mm.h>
 #else
 # include <asm-generic/mmu_context.h>
 #endif
diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
index 9203d90e610c..03a54df5fb86 100644
--- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
+++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_io.h
@@ -1,7 +1,7 @@
 #ifndef BCM63XX_IO_H_
 #define BCM63XX_IO_H_
 
-#include "bcm63xx_cpu.h"
+#include <asm/mach-bcm63xx/bcm63xx_cpu.h>
 
 /*
  * Physical memory map, RAM is mapped at 0x0.
diff --git a/arch/mips/include/asm/mach-pnx833x/gpio.h b/arch/mips/include/asm/mach-pnx833x/gpio.h
index ed3a88da70f6..f192acf4a8af 100644
--- a/arch/mips/include/asm/mach-pnx833x/gpio.h
+++ b/arch/mips/include/asm/mach-pnx833x/gpio.h
@@ -30,7 +30,7 @@
    - including locking between different uses
 */
 
-#include "pnx833x.h"
+#include <asm/mach-pnx833x/pnx833x.h>
 
 #define SET_REG_BIT(reg, bit)		do { (reg |= (1 << (bit))); } while (0)
 #define CLEAR_REG_BIT(reg, bit)		do { (reg &= ~(1 << (bit))); } while (0)
diff --git a/arch/mips/include/asm/octeon/cvmx-asm.h b/arch/mips/include/asm/octeon/cvmx-asm.h
index 5de5de95311b..31eacc24b775 100644
--- a/arch/mips/include/asm/octeon/cvmx-asm.h
+++ b/arch/mips/include/asm/octeon/cvmx-asm.h
@@ -32,7 +32,7 @@
 #ifndef __CVMX_ASM_H__
 #define __CVMX_ASM_H__
 
-#include "octeon-model.h"
+#include <asm/octeon/octeon-model.h>
 
 /* other useful stuff */
 #define CVMX_SYNC asm volatile ("sync" : : : "memory")
diff --git a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
index 614653b686a0..fed91125317f 100644
--- a/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
+++ b/arch/mips/include/asm/octeon/cvmx-cmd-queue.h
@@ -76,7 +76,7 @@
 
 #include <linux/prefetch.h>
 
-#include "cvmx-fpa.h"
+#include <asm/octeon/cvmx-fpa.h>
 /**
  * By default we disable the max depth support. Most programs
  * don't use it and it slows down the command queue processing
diff --git a/arch/mips/include/asm/octeon/cvmx-fpa.h b/arch/mips/include/asm/octeon/cvmx-fpa.h
index 1f04f9658736..541a1ae02b6f 100644
--- a/arch/mips/include/asm/octeon/cvmx-fpa.h
+++ b/arch/mips/include/asm/octeon/cvmx-fpa.h
@@ -36,8 +36,8 @@
 #ifndef __CVMX_FPA_H__
 #define __CVMX_FPA_H__
 
-#include "cvmx-address.h"
-#include "cvmx-fpa-defs.h"
+#include <asm/octeon/cvmx-address.h>
+#include <asm/octeon/cvmx-fpa-defs.h>
 
 #define CVMX_FPA_NUM_POOLS      8
 #define CVMX_FPA_MIN_BLOCK_SIZE 128
diff --git a/arch/mips/include/asm/octeon/cvmx-helper-board.h b/arch/mips/include/asm/octeon/cvmx-helper-board.h
index 88527fa835c9..442f508eaac9 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper-board.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper-board.h
@@ -34,7 +34,7 @@
 #ifndef __CVMX_HELPER_BOARD_H__
 #define __CVMX_HELPER_BOARD_H__
 
-#include "cvmx-helper.h"
+#include <asm/octeon/cvmx-helper.h>
 
 typedef enum {
 	set_phy_link_flags_autoneg = 0x1,
diff --git a/arch/mips/include/asm/octeon/cvmx-helper.h b/arch/mips/include/asm/octeon/cvmx-helper.h
index 0ac6b9f412be..691c8142cd4f 100644
--- a/arch/mips/include/asm/octeon/cvmx-helper.h
+++ b/arch/mips/include/asm/octeon/cvmx-helper.h
@@ -34,9 +34,9 @@
 #ifndef __CVMX_HELPER_H__
 #define __CVMX_HELPER_H__
 
-#include "cvmx-config.h"
-#include "cvmx-fpa.h"
-#include "cvmx-wqe.h"
+#include <asm/octeon/cvmx-config.h>
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-wqe.h>
 
 typedef enum {
 	CVMX_HELPER_INTERFACE_MODE_DISABLED,
@@ -62,13 +62,13 @@ typedef union {
 } cvmx_helper_link_info_t;
 
 #include <asm/octeon/cvmx-helper-errata.h>
-#include "cvmx-helper-loop.h"
-#include "cvmx-helper-npi.h"
-#include "cvmx-helper-rgmii.h"
-#include "cvmx-helper-sgmii.h"
-#include "cvmx-helper-spi.h"
-#include "cvmx-helper-util.h"
-#include "cvmx-helper-xaui.h"
+#include <asm/octeon/cvmx-helper-loop.h>
+#include <asm/octeon/cvmx-helper-npi.h>
+#include <asm/octeon/cvmx-helper-rgmii.h>
+#include <asm/octeon/cvmx-helper-sgmii.h>
+#include <asm/octeon/cvmx-helper-spi.h>
+#include <asm/octeon/cvmx-helper-util.h>
+#include <asm/octeon/cvmx-helper-xaui.h>
 
 /**
  * cvmx_override_pko_queue_priority(int ipd_port, uint64_t
diff --git a/arch/mips/include/asm/octeon/cvmx-mdio.h b/arch/mips/include/asm/octeon/cvmx-mdio.h
index d88ab8d8e37d..6f0cd182cec8 100644
--- a/arch/mips/include/asm/octeon/cvmx-mdio.h
+++ b/arch/mips/include/asm/octeon/cvmx-mdio.h
@@ -35,7 +35,7 @@
 #ifndef __CVMX_MIO_H__
 #define __CVMX_MIO_H__
 
-#include "cvmx-smix-defs.h"
+#include <asm/octeon/cvmx-smix-defs.h>
 
 /**
  * PHY register 0 from the 802.3 spec
diff --git a/arch/mips/include/asm/octeon/cvmx-pip.h b/arch/mips/include/asm/octeon/cvmx-pip.h
index 78dbce8f2c5e..9e739a640855 100644
--- a/arch/mips/include/asm/octeon/cvmx-pip.h
+++ b/arch/mips/include/asm/octeon/cvmx-pip.h
@@ -33,9 +33,9 @@
 #ifndef __CVMX_PIP_H__
 #define __CVMX_PIP_H__
 
-#include "cvmx-wqe.h"
-#include "cvmx-fpa.h"
-#include "cvmx-pip-defs.h"
+#include <asm/octeon/cvmx-wqe.h>
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-pip-defs.h>
 
 #define CVMX_PIP_NUM_INPUT_PORTS                40
 #define CVMX_PIP_NUM_WATCHERS                   4
diff --git a/arch/mips/include/asm/octeon/cvmx-pko.h b/arch/mips/include/asm/octeon/cvmx-pko.h
index de3412aada5d..c6daeedf1f81 100644
--- a/arch/mips/include/asm/octeon/cvmx-pko.h
+++ b/arch/mips/include/asm/octeon/cvmx-pko.h
@@ -58,10 +58,10 @@
 #ifndef __CVMX_PKO_H__
 #define __CVMX_PKO_H__
 
-#include "cvmx-fpa.h"
-#include "cvmx-pow.h"
-#include "cvmx-cmd-queue.h"
-#include "cvmx-pko-defs.h"
+#include <asm/octeon/cvmx-fpa.h>
+#include <asm/octeon/cvmx-pow.h>
+#include <asm/octeon/cvmx-cmd-queue.h>
+#include <asm/octeon/cvmx-pko-defs.h>
 
 /* Adjust the command buffer size by 1 word so that in the case of using only
  * two word PKO commands no command words stradle buffers.  The useful values
diff --git a/arch/mips/include/asm/octeon/cvmx-pow.h b/arch/mips/include/asm/octeon/cvmx-pow.h
index 999aefe3274c..92742b241a51 100644
--- a/arch/mips/include/asm/octeon/cvmx-pow.h
+++ b/arch/mips/include/asm/octeon/cvmx-pow.h
@@ -53,8 +53,8 @@
 
 #include <asm/octeon/cvmx-pow-defs.h>
 
-#include "cvmx-scratch.h"
-#include "cvmx-wqe.h"
+#include <asm/octeon/cvmx-scratch.h>
+#include <asm/octeon/cvmx-wqe.h>
 
 /* Default to having all POW constancy checks turned on */
 #ifndef CVMX_ENABLE_POW_CHECKS
diff --git a/arch/mips/include/asm/octeon/cvmx-spi.h b/arch/mips/include/asm/octeon/cvmx-spi.h
index e814648953a5..3bf53b537bcf 100644
--- a/arch/mips/include/asm/octeon/cvmx-spi.h
+++ b/arch/mips/include/asm/octeon/cvmx-spi.h
@@ -32,7 +32,7 @@
 #ifndef __CVMX_SPI_H__
 #define __CVMX_SPI_H__
 
-#include "cvmx-gmxx-defs.h"
+#include <asm/octeon/cvmx-gmxx-defs.h>
 
 /* CSR typedefs have been moved to cvmx-csr-*.h */
 
diff --git a/arch/mips/include/asm/octeon/cvmx-spinlock.h b/arch/mips/include/asm/octeon/cvmx-spinlock.h
index 2fbf0871df11..a672abb1bc4f 100644
--- a/arch/mips/include/asm/octeon/cvmx-spinlock.h
+++ b/arch/mips/include/asm/octeon/cvmx-spinlock.h
@@ -35,7 +35,7 @@
 #ifndef __CVMX_SPINLOCK_H__
 #define __CVMX_SPINLOCK_H__
 
-#include "cvmx-asm.h"
+#include <asm/octeon/cvmx-asm.h>
 
 /* Spinlocks for Octeon */
 
diff --git a/arch/mips/include/asm/octeon/cvmx-wqe.h b/arch/mips/include/asm/octeon/cvmx-wqe.h
index 653610953d28..df762389e271 100644
--- a/arch/mips/include/asm/octeon/cvmx-wqe.h
+++ b/arch/mips/include/asm/octeon/cvmx-wqe.h
@@ -40,7 +40,7 @@
 #ifndef __CVMX_WQE_H__
 #define __CVMX_WQE_H__
 
-#include "cvmx-packet.h"
+#include <asm/octeon/cvmx-packet.h>
 
 
 #define OCT_TAG_TYPE_STRING(x)						\
diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 740be97a3251..db58beab6cb2 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -52,24 +52,24 @@ enum cvmx_mips_space {
 #define CVMX_ADD_IO_SEG(add) CVMX_ADD_SEG(CVMX_IO_SEG, (add))
 #endif
 
-#include "cvmx-asm.h"
-#include "cvmx-packet.h"
-#include "cvmx-sysinfo.h"
-
-#include "cvmx-ciu-defs.h"
-#include "cvmx-gpio-defs.h"
-#include "cvmx-iob-defs.h"
-#include "cvmx-ipd-defs.h"
-#include "cvmx-l2c-defs.h"
-#include "cvmx-l2d-defs.h"
-#include "cvmx-l2t-defs.h"
-#include "cvmx-led-defs.h"
-#include "cvmx-mio-defs.h"
-#include "cvmx-pow-defs.h"
-
-#include "cvmx-bootinfo.h"
-#include "cvmx-bootmem.h"
-#include "cvmx-l2c.h"
+#include <asm/octeon/cvmx-asm.h>
+#include <asm/octeon/cvmx-packet.h>
+#include <asm/octeon/cvmx-sysinfo.h>
+
+#include <asm/octeon/cvmx-ciu-defs.h>
+#include <asm/octeon/cvmx-gpio-defs.h>
+#include <asm/octeon/cvmx-iob-defs.h>
+#include <asm/octeon/cvmx-ipd-defs.h>
+#include <asm/octeon/cvmx-l2c-defs.h>
+#include <asm/octeon/cvmx-l2d-defs.h>
+#include <asm/octeon/cvmx-l2t-defs.h>
+#include <asm/octeon/cvmx-led-defs.h>
+#include <asm/octeon/cvmx-mio-defs.h>
+#include <asm/octeon/cvmx-pow-defs.h>
+
+#include <asm/octeon/cvmx-bootinfo.h>
+#include <asm/octeon/cvmx-bootmem.h>
+#include <asm/octeon/cvmx-l2c.h>
 
 #ifndef CVMX_ENABLE_DEBUG_PRINTS
 #define CVMX_ENABLE_DEBUG_PRINTS 1
diff --git a/arch/mips/include/asm/octeon/octeon-model.h b/arch/mips/include/asm/octeon/octeon-model.h
index 4e338a4d9424..23b895cb260b 100644
--- a/arch/mips/include/asm/octeon/octeon-model.h
+++ b/arch/mips/include/asm/octeon/octeon-model.h
@@ -313,6 +313,6 @@ static inline int __octeon_is_model_runtime__(uint32_t model)
 const char *octeon_model_get_string(uint32_t chip_id);
 const char *octeon_model_get_string_buffer(uint32_t chip_id, char *buffer);
 
-#include "octeon-feature.h"
+#include <asm/octeon/octeon-feature.h>
 
 #endif /* __OCTEON_MODEL_H__ */
diff --git a/arch/mips/include/asm/octeon/octeon.h b/arch/mips/include/asm/octeon/octeon.h
index 1e2486e23573..c4a1b31966bb 100644
--- a/arch/mips/include/asm/octeon/octeon.h
+++ b/arch/mips/include/asm/octeon/octeon.h
@@ -8,7 +8,7 @@
 #ifndef __ASM_OCTEON_OCTEON_H
 #define __ASM_OCTEON_OCTEON_H
 
-#include "cvmx.h"
+#include <asm/octeon/cvmx.h>
 
 extern uint64_t octeon_bootmem_alloc_range_phys(uint64_t size,
 						uint64_t alignment,
diff --git a/arch/mips/include/asm/sibyte/bcm1480_int.h b/arch/mips/include/asm/sibyte/bcm1480_int.h
index 6109557c14e9..fffb224d2297 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_int.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_int.h
@@ -34,7 +34,7 @@
 #ifndef _BCM1480_INT_H
 #define _BCM1480_INT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Interrupt Mapper Constants
diff --git a/arch/mips/include/asm/sibyte/bcm1480_l2c.h b/arch/mips/include/asm/sibyte/bcm1480_l2c.h
index fd75817f7ac4..725d38cb9d1c 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_l2c.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_l2c.h
@@ -33,7 +33,7 @@
 #ifndef _BCM1480_L2C_H
 #define _BCM1480_L2C_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Format of level 2 cache management address (Table 55)
diff --git a/arch/mips/include/asm/sibyte/bcm1480_mc.h b/arch/mips/include/asm/sibyte/bcm1480_mc.h
index f26a41a82b59..4307a758e3bf 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_mc.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_mc.h
@@ -33,7 +33,7 @@
 #ifndef _BCM1480_MC_H
 #define _BCM1480_MC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Memory Channel Configuration Register (Table 81)
diff --git a/arch/mips/include/asm/sibyte/bcm1480_regs.h b/arch/mips/include/asm/sibyte/bcm1480_regs.h
index b4077bb72611..84d168ddfebb 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_regs.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_regs.h
@@ -32,14 +32,14 @@
 #ifndef _BCM1480_REGS_H
 #define _BCM1480_REGS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Pull in the BCM1250's registers since a great deal of the 1480's
     *  functions are the same as the BCM1250.
     ********************************************************************* */
 
-#include "sb1250_regs.h"
+#include <asm/sibyte/sb1250_regs.h>
 
 
 /*  *********************************************************************
diff --git a/arch/mips/include/asm/sibyte/bcm1480_scd.h b/arch/mips/include/asm/sibyte/bcm1480_scd.h
index 25ef24cbb92a..2af3706b9648 100644
--- a/arch/mips/include/asm/sibyte/bcm1480_scd.h
+++ b/arch/mips/include/asm/sibyte/bcm1480_scd.h
@@ -32,13 +32,13 @@
 #ifndef _BCM1480_SCD_H
 #define _BCM1480_SCD_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Pull in the BCM1250's SCD since lots of stuff is the same.
     ********************************************************************* */
 
-#include "sb1250_scd.h"
+#include <asm/sibyte/sb1250_scd.h>
 
 /*  *********************************************************************
     *  Some general notes:
diff --git a/arch/mips/include/asm/sibyte/sb1250_dma.h b/arch/mips/include/asm/sibyte/sb1250_dma.h
index bad56171d747..6c44dfb52878 100644
--- a/arch/mips/include/asm/sibyte/sb1250_dma.h
+++ b/arch/mips/include/asm/sibyte/sb1250_dma.h
@@ -36,7 +36,7 @@
 #define _SB1250_DMA_H
 
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  DMA Registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_genbus.h b/arch/mips/include/asm/sibyte/sb1250_genbus.h
index 94e9c7c8e783..a96ded17bdc9 100644
--- a/arch/mips/include/asm/sibyte/sb1250_genbus.h
+++ b/arch/mips/include/asm/sibyte/sb1250_genbus.h
@@ -34,7 +34,7 @@
 #ifndef _SB1250_GENBUS_H
 #define _SB1250_GENBUS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Generic Bus Region Configuration Registers (Table 11-4)
diff --git a/arch/mips/include/asm/sibyte/sb1250_int.h b/arch/mips/include/asm/sibyte/sb1250_int.h
index f2850b4bcfd4..dbea73ddd2fe 100644
--- a/arch/mips/include/asm/sibyte/sb1250_int.h
+++ b/arch/mips/include/asm/sibyte/sb1250_int.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_INT_H
 #define _SB1250_INT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Interrupt Mapper Constants
diff --git a/arch/mips/include/asm/sibyte/sb1250_l2c.h b/arch/mips/include/asm/sibyte/sb1250_l2c.h
index 6554dcf05cfe..b61a7491607d 100644
--- a/arch/mips/include/asm/sibyte/sb1250_l2c.h
+++ b/arch/mips/include/asm/sibyte/sb1250_l2c.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_L2C_H
 #define _SB1250_L2C_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Level 2 Cache Tag register (Table 5-3)
diff --git a/arch/mips/include/asm/sibyte/sb1250_ldt.h b/arch/mips/include/asm/sibyte/sb1250_ldt.h
index 1e76cf137995..bf7f320d1a87 100644
--- a/arch/mips/include/asm/sibyte/sb1250_ldt.h
+++ b/arch/mips/include/asm/sibyte/sb1250_ldt.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_LDT_H
 #define _SB1250_LDT_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 #define K_LDT_VENDOR_SIBYTE	0x166D
 #define K_LDT_DEVICE_SB1250	0x0002
diff --git a/arch/mips/include/asm/sibyte/sb1250_mac.h b/arch/mips/include/asm/sibyte/sb1250_mac.h
index 77f787284235..cfc4d7870882 100644
--- a/arch/mips/include/asm/sibyte/sb1250_mac.h
+++ b/arch/mips/include/asm/sibyte/sb1250_mac.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_MAC_H
 #define _SB1250_MAC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  Ethernet MAC Registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_mc.h b/arch/mips/include/asm/sibyte/sb1250_mc.h
index 1eb1b5a88736..15048dcaf22f 100644
--- a/arch/mips/include/asm/sibyte/sb1250_mc.h
+++ b/arch/mips/include/asm/sibyte/sb1250_mc.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_MC_H
 #define _SB1250_MC_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Memory Channel Config Register (table 6-14)
diff --git a/arch/mips/include/asm/sibyte/sb1250_regs.h b/arch/mips/include/asm/sibyte/sb1250_regs.h
index 8f53ec817a5e..29b9f0b26b3a 100644
--- a/arch/mips/include/asm/sibyte/sb1250_regs.h
+++ b/arch/mips/include/asm/sibyte/sb1250_regs.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_REGS_H
 #define _SB1250_REGS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 
 /*  *********************************************************************
diff --git a/arch/mips/include/asm/sibyte/sb1250_scd.h b/arch/mips/include/asm/sibyte/sb1250_scd.h
index e49c3e89b5ee..615e165dbd21 100644
--- a/arch/mips/include/asm/sibyte/sb1250_scd.h
+++ b/arch/mips/include/asm/sibyte/sb1250_scd.h
@@ -32,7 +32,7 @@
 #ifndef _SB1250_SCD_H
 #define _SB1250_SCD_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*  *********************************************************************
     *  System control/debug registers
diff --git a/arch/mips/include/asm/sibyte/sb1250_smbus.h b/arch/mips/include/asm/sibyte/sb1250_smbus.h
index 04769923cf1e..128d6b75b819 100644
--- a/arch/mips/include/asm/sibyte/sb1250_smbus.h
+++ b/arch/mips/include/asm/sibyte/sb1250_smbus.h
@@ -34,7 +34,7 @@
 #ifndef _SB1250_SMBUS_H
 #define _SB1250_SMBUS_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * SMBus Clock Frequency Register (Table 14-2)
diff --git a/arch/mips/include/asm/sibyte/sb1250_syncser.h b/arch/mips/include/asm/sibyte/sb1250_syncser.h
index d4b8558e0bf1..274e9179d326 100644
--- a/arch/mips/include/asm/sibyte/sb1250_syncser.h
+++ b/arch/mips/include/asm/sibyte/sb1250_syncser.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_SYNCSER_H
 #define _SB1250_SYNCSER_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /*
  * Serial Mode Configuration Register
diff --git a/arch/mips/include/asm/sibyte/sb1250_uart.h b/arch/mips/include/asm/sibyte/sb1250_uart.h
index d835bf280140..bb99ecac5817 100644
--- a/arch/mips/include/asm/sibyte/sb1250_uart.h
+++ b/arch/mips/include/asm/sibyte/sb1250_uart.h
@@ -33,7 +33,7 @@
 #ifndef _SB1250_UART_H
 #define _SB1250_UART_H
 
-#include "sb1250_defs.h"
+#include <asm/sibyte/sb1250_defs.h>
 
 /* **********************************************************************
    * DUART Registers
diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 7f065e178ec4..0e15db4d703b 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -24,7 +24,7 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/device.h>
-#include "cell-pmu.h"
+#include <asm/cell-pmu.h>
 
 union ps3_firmware_version {
 	u64 raw;
diff --git a/arch/powerpc/include/asm/ucc_fast.h b/arch/powerpc/include/asm/ucc_fast.h
index 839aab8bf37d..4644c840e2fa 100644
--- a/arch/powerpc/include/asm/ucc_fast.h
+++ b/arch/powerpc/include/asm/ucc_fast.h
@@ -19,7 +19,7 @@
 #include <asm/immap_qe.h>
 #include <asm/qe.h>
 
-#include "ucc.h"
+#include <asm/ucc.h>
 
 /* Receive BD's status */
 #define R_E	0x80000000	/* buffer empty */
diff --git a/arch/powerpc/include/asm/ucc_slow.h b/arch/powerpc/include/asm/ucc_slow.h
index 0980e6ad335b..cf131ffdb8d1 100644
--- a/arch/powerpc/include/asm/ucc_slow.h
+++ b/arch/powerpc/include/asm/ucc_slow.h
@@ -20,7 +20,7 @@
 #include <asm/immap_qe.h>
 #include <asm/qe.h>
 
-#include "ucc.h"
+#include <asm/ucc.h>
 
 /* transmit BD's status */
 #define T_R	0x80000000	/* ready bit */
diff --git a/arch/sh/include/asm/bl_bit.h b/arch/sh/include/asm/bl_bit.h
index 45e6b9fc37a0..06e4163c6746 100644
--- a/arch/sh/include/asm/bl_bit.h
+++ b/arch/sh/include/asm/bl_bit.h
@@ -2,9 +2,9 @@
 #define __ASM_SH_BL_BIT_H
 
 #ifdef CONFIG_SUPERH32
-# include "bl_bit_32.h"
+# include <asm/bl_bit_32.h>
 #else
-# include "bl_bit_64.h"
+# include <asm/bl_bit_64.h>
 #endif
 
 #endif /* __ASM_SH_BL_BIT_H */
diff --git a/arch/sh/include/asm/cache_insns.h b/arch/sh/include/asm/cache_insns.h
index d25fbe53090d..355cb06b7a30 100644
--- a/arch/sh/include/asm/cache_insns.h
+++ b/arch/sh/include/asm/cache_insns.h
@@ -3,9 +3,9 @@
 
 
 #ifdef CONFIG_SUPERH32
-# include "cache_insns_32.h"
+# include <asm/cache_insns_32.h>
 #else
-# include "cache_insns_64.h"
+# include <asm/cache_insns_64.h>
 #endif
 
 #endif /* __ASM_SH_CACHE_INSNS_H */
diff --git a/arch/sh/include/asm/checksum.h b/arch/sh/include/asm/checksum.h
index fc26d1f4b590..34ae26204524 100644
--- a/arch/sh/include/asm/checksum.h
+++ b/arch/sh/include/asm/checksum.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_SUPERH32
-# include "checksum_32.h"
+# include <asm/checksum_32.h>
 #else
 # include <asm-generic/checksum.h>
 #endif
diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h
index 384c7471a374..21c5088788da 100644
--- a/arch/sh/include/asm/mmu_context.h
+++ b/arch/sh/include/asm/mmu_context.h
@@ -46,9 +46,9 @@
 #define MMU_VPN_MASK	0xfffff000
 
 #if defined(CONFIG_SUPERH32)
-#include "mmu_context_32.h"
+#include <asm/mmu_context_32.h>
 #else
-#include "mmu_context_64.h"
+#include <asm/mmu_context_64.h>
 #endif
 
 /*
diff --git a/arch/sh/include/asm/posix_types.h b/arch/sh/include/asm/posix_types.h
index 4eeb723aee7e..f08449bcbde7 100644
--- a/arch/sh/include/asm/posix_types.h
+++ b/arch/sh/include/asm/posix_types.h
@@ -1,13 +1,13 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_SUPERH32
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #else
 # ifdef __SH5__
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # else
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # endif
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/processor.h b/arch/sh/include/asm/processor.h
index 3d14aeaef57c..5448f9bbf4ab 100644
--- a/arch/sh/include/asm/processor.h
+++ b/arch/sh/include/asm/processor.h
@@ -175,9 +175,9 @@ extern unsigned int instruction_size(unsigned int insn);
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_SUPERH32
-# include "processor_32.h"
+# include <asm/processor_32.h>
 #else
-# include "processor_64.h"
+# include <asm/processor_64.h>
 #endif
 
 #endif /* __ASM_SH_PROCESSOR_H */
diff --git a/arch/sh/include/asm/ptrace.h b/arch/sh/include/asm/ptrace.h
index c7b7e1ed194a..a4a38dff997a 100644
--- a/arch/sh/include/asm/ptrace.h
+++ b/arch/sh/include/asm/ptrace.h
@@ -25,9 +25,9 @@
 #define PT_TEXT_LEN		252
 
 #if defined(__SH5__) || defined(CONFIG_CPU_SH5)
-#include "ptrace_64.h"
+#include <asm/ptrace_64.h>
 #else
-#include "ptrace_32.h"
+#include <asm/ptrace_32.h>
 #endif
 
 #ifdef __KERNEL__
diff --git a/arch/sh/include/asm/string.h b/arch/sh/include/asm/string.h
index 8c1ea21dc0ae..114011fa08af 100644
--- a/arch/sh/include/asm/string.h
+++ b/arch/sh/include/asm/string.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_SUPERH32
-# include "string_32.h"
+# include <asm/string_32.h>
 #else
-# include "string_64.h"
+# include <asm/string_64.h>
 #endif
diff --git a/arch/sh/include/asm/switch_to.h b/arch/sh/include/asm/switch_to.h
index 62b1941813e3..bcd722fc8347 100644
--- a/arch/sh/include/asm/switch_to.h
+++ b/arch/sh/include/asm/switch_to.h
@@ -11,9 +11,9 @@
 #define __ASM_SH_SWITCH_TO_H
 
 #ifdef CONFIG_SUPERH32
-# include "switch_to_32.h"
+# include <asm/switch_to_32.h>
 #else
-# include "switch_to_64.h"
+# include <asm/switch_to_64.h>
 #endif
 
 #endif /* __ASM_SH_SWITCH_TO_H */
diff --git a/arch/sh/include/asm/syscall.h b/arch/sh/include/asm/syscall.h
index aa7777bdc370..847128da6eac 100644
--- a/arch/sh/include/asm/syscall.h
+++ b/arch/sh/include/asm/syscall.h
@@ -4,9 +4,9 @@
 extern const unsigned long sys_call_table[];
 
 #ifdef CONFIG_SUPERH32
-# include "syscall_32.h"
+# include <asm/syscall_32.h>
 #else
-# include "syscall_64.h"
+# include <asm/syscall_64.h>
 #endif
 
 #endif /* __ASM_SH_SYSCALL_H */
diff --git a/arch/sh/include/asm/syscalls.h b/arch/sh/include/asm/syscalls.h
index 507725af2e54..3dbfef06f6b2 100644
--- a/arch/sh/include/asm/syscalls.h
+++ b/arch/sh/include/asm/syscalls.h
@@ -11,9 +11,9 @@ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
 			  unsigned long fd, unsigned long pgoff);
 
 #ifdef CONFIG_SUPERH32
-# include "syscalls_32.h"
+# include <asm/syscalls_32.h>
 #else
-# include "syscalls_64.h"
+# include <asm/syscalls_64.h>
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h
index ec88bfcdf7ce..e61d43d9f689 100644
--- a/arch/sh/include/asm/tlb.h
+++ b/arch/sh/include/asm/tlb.h
@@ -2,7 +2,7 @@
 #define __ASM_SH_TLB_H
 
 #ifdef CONFIG_SUPERH64
-# include "tlb_64.h"
+# include <asm/tlb_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/sh/include/asm/traps.h b/arch/sh/include/asm/traps.h
index afd9df8d0641..9cc149a0dbd1 100644
--- a/arch/sh/include/asm/traps.h
+++ b/arch/sh/include/asm/traps.h
@@ -4,9 +4,9 @@
 #include <linux/compiler.h>
 
 #ifdef CONFIG_SUPERH32
-# include "traps_32.h"
+# include <asm/traps_32.h>
 #else
-# include "traps_64.h"
+# include <asm/traps_64.h>
 #endif
 
 BUILD_TRAP_HANDLER(address_error);
diff --git a/arch/sh/include/asm/uaccess.h b/arch/sh/include/asm/uaccess.h
index 8698a80ed00c..9486376605f4 100644
--- a/arch/sh/include/asm/uaccess.h
+++ b/arch/sh/include/asm/uaccess.h
@@ -97,9 +97,9 @@ struct __large_struct { unsigned long buf[100]; };
 })
 
 #ifdef CONFIG_SUPERH32
-# include "uaccess_32.h"
+# include <asm/uaccess_32.h>
 #else
-# include "uaccess_64.h"
+# include <asm/uaccess_64.h>
 #endif
 
 extern long strncpy_from_user(char *dest, const char __user *src, long count);
diff --git a/arch/sh/include/asm/unistd.h b/arch/sh/include/asm/unistd.h
index 7bc67076baac..307201a854f3 100644
--- a/arch/sh/include/asm/unistd.h
+++ b/arch/sh/include/asm/unistd.h
@@ -1,8 +1,8 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_SUPERH32
-#  include "unistd_32.h"
+#  include <asm/unistd_32.h>
 # else
-#  include "unistd_64.h"
+#  include <asm/unistd_64.h>
 # endif
 
 # define __ARCH_WANT_SYS_RT_SIGSUSPEND
@@ -40,8 +40,8 @@
 
 #else
 # ifdef __SH5__
-#  include "unistd_64.h"
+#  include <asm/unistd_64.h>
 # else
-#  include "unistd_32.h"
+#  include <asm/unistd_32.h>
 # endif
 #endif
diff --git a/arch/sh/include/mach-ecovec24/mach/romimage.h b/arch/sh/include/mach-ecovec24/mach/romimage.h
index d63ef51ec186..60f3e8af05fa 100644
--- a/arch/sh/include/mach-ecovec24/mach/romimage.h
+++ b/arch/sh/include/mach-ecovec24/mach/romimage.h
@@ -6,7 +6,7 @@
  */
 
 #include <asm/romimage-macros.h>
-#include "partner-jet-setup.txt"
+#include <mach/partner-jet-setup.txt>
 
 	/* execute icbi after enabling cache */
 	mov.l	1f, r0
diff --git a/arch/sh/include/mach-kfr2r09/mach/romimage.h b/arch/sh/include/mach-kfr2r09/mach/romimage.h
index 7a883167c846..1afae21ced5f 100644
--- a/arch/sh/include/mach-kfr2r09/mach/romimage.h
+++ b/arch/sh/include/mach-kfr2r09/mach/romimage.h
@@ -6,7 +6,7 @@
  */
 
 #include <asm/romimage-macros.h>
-#include "partner-jet-setup.txt"
+#include <mach/partner-jet-setup.txt>
 
 	/* execute icbi after enabling cache */
 	mov.l	1f, r0
diff --git a/arch/tile/include/gxio/dma_queue.h b/arch/tile/include/gxio/dma_queue.h
index 00654feb7db0..b9e45e37649e 100644
--- a/arch/tile/include/gxio/dma_queue.h
+++ b/arch/tile/include/gxio/dma_queue.h
@@ -19,7 +19,7 @@
  * DMA queue management APIs shared between TRIO and mPIPE.
  */
 
-#include "common.h"
+#include <gxio/common.h>
 
 /* The credit counter lives in the high 32 bits. */
 #define DMA_QUEUE_CREDIT_SHIFT 32
diff --git a/arch/tile/include/gxio/mpipe.h b/arch/tile/include/gxio/mpipe.h
index 78c598618c97..b74f470ed11e 100644
--- a/arch/tile/include/gxio/mpipe.h
+++ b/arch/tile/include/gxio/mpipe.h
@@ -21,8 +21,8 @@
  * resources.
  */
 
-#include "common.h"
-#include "dma_queue.h"
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
 
 #include <linux/time.h>
 
diff --git a/arch/tile/include/gxio/trio.h b/arch/tile/include/gxio/trio.h
index 77b80cdd46d8..df10a662cc25 100644
--- a/arch/tile/include/gxio/trio.h
+++ b/arch/tile/include/gxio/trio.h
@@ -140,8 +140,8 @@
 
 #include <linux/types.h>
 
-#include "common.h"
-#include "dma_queue.h"
+#include <gxio/common.h>
+#include <gxio/dma_queue.h>
 
 #include <arch/trio_constants.h>
 #include <arch/trio.h>
diff --git a/arch/tile/include/gxio/usb_host.h b/arch/tile/include/gxio/usb_host.h
index a60a126e4565..5eedec0e988e 100644
--- a/arch/tile/include/gxio/usb_host.h
+++ b/arch/tile/include/gxio/usb_host.h
@@ -14,7 +14,7 @@
 #ifndef _GXIO_USB_H_
 #define _GXIO_USB_H_
 
-#include "common.h"
+#include <gxio/common.h>
 
 #include <hv/drv_usb_host_intf.h>
 #include <hv/iorpc.h>
diff --git a/arch/tile/include/hv/iorpc.h b/arch/tile/include/hv/iorpc.h
index 89c72a5d9341..ddf1604482b3 100644
--- a/arch/tile/include/hv/iorpc.h
+++ b/arch/tile/include/hv/iorpc.h
@@ -248,7 +248,7 @@
 #if defined(__HV__)
 #include <hv/hypervisor.h>
 #elif defined(__KERNEL__)
-#include "hypervisor.h"
+#include <hv/hypervisor.h>
 #include <linux/types.h>
 #else
 #include <stdint.h>
diff --git a/arch/unicore32/include/mach/PKUnity.h b/arch/unicore32/include/mach/PKUnity.h
index 8040d575dddb..46705afcbf5a 100644
--- a/arch/unicore32/include/mach/PKUnity.h
+++ b/arch/unicore32/include/mach/PKUnity.h
@@ -15,7 +15,7 @@
 #error You must include hardware.h not PKUnity.h
 #endif
 
-#include "bitfield.h"
+#include <mach/bitfield.h>
 
 /*
  * Memory Definitions
@@ -32,7 +32,7 @@
  * 0x98000000 - 0x9FFFFFFF 128MB  PCI PCI-AHB MEM-mapping
  */
 #define PKUNITY_PCI_BASE		io_p2v(0x80000000) /* 0x80000000 - 0xBFFFFFFF 1GB */
-#include "regs-pci.h"
+#include <mach/regs-pci.h>
 
 #define PKUNITY_PCICFG_BASE		(PKUNITY_PCI_BASE + 0x0)
 #define PKUNITY_PCIBRI_BASE		(PKUNITY_PCI_BASE + 0x00010000)
@@ -50,18 +50,18 @@
 #define PKUNITY_ARBITER_BASE		(PKUNITY_AHB_BASE + 0x000000) /* AHB-2 */
 #define PKUNITY_DDR2CTRL_BASE		(PKUNITY_AHB_BASE + 0x100000) /* AHB-3 */
 #define PKUNITY_DMAC_BASE		(PKUNITY_AHB_BASE + 0x200000) /* AHB-4 */
-#include "regs-dmac.h"
+#include <mach/regs-dmac.h>
 #define PKUNITY_UMAL_BASE		(PKUNITY_AHB_BASE + 0x300000) /* AHB-5 */
-#include "regs-umal.h"
+#include <mach/regs-umal.h>
 #define PKUNITY_USB_BASE		(PKUNITY_AHB_BASE + 0x400000) /* AHB-6 */
 #define PKUNITY_SATA_BASE		(PKUNITY_AHB_BASE + 0x500000) /* AHB-7 */
 #define PKUNITY_SMC_BASE		(PKUNITY_AHB_BASE + 0x600000) /* AHB-8 */
 /* AHB-9 is for APB bridge */
 #define PKUNITY_MME_BASE		(PKUNITY_AHB_BASE + 0x700000) /* AHB-10 */
 #define PKUNITY_UNIGFX_BASE		(PKUNITY_AHB_BASE + 0x800000) /* AHB-11 */
-#include "regs-unigfx.h"
+#include <mach/regs-unigfx.h>
 #define PKUNITY_NAND_BASE		(PKUNITY_AHB_BASE + 0x900000) /* AHB-12 */
-#include "regs-nand.h"
+#include <mach/regs-nand.h>
 #define PKUNITY_H264D_BASE		(PKUNITY_AHB_BASE + 0xA00000) /* AHB-13 */
 #define PKUNITY_H264E_BASE		(PKUNITY_AHB_BASE + 0xB00000) /* AHB-14 */
 
@@ -72,27 +72,27 @@
 
 #define PKUNITY_UART0_BASE		(PKUNITY_APB_BASE + 0x000000) /* APB-0 */
 #define PKUNITY_UART1_BASE		(PKUNITY_APB_BASE + 0x100000) /* APB-1 */
-#include "regs-uart.h"
+#include <mach/regs-uart.h>
 #define PKUNITY_I2C_BASE		(PKUNITY_APB_BASE + 0x200000) /* APB-2 */
-#include "regs-i2c.h"
+#include <mach/regs-i2c.h>
 #define PKUNITY_SPI_BASE		(PKUNITY_APB_BASE + 0x300000) /* APB-3 */
-#include "regs-spi.h"
+#include <mach/regs-spi.h>
 #define PKUNITY_AC97_BASE		(PKUNITY_APB_BASE + 0x400000) /* APB-4 */
-#include "regs-ac97.h"
+#include <mach/regs-ac97.h>
 #define PKUNITY_GPIO_BASE		(PKUNITY_APB_BASE + 0x500000) /* APB-5 */
-#include "regs-gpio.h"
+#include <mach/regs-gpio.h>
 #define PKUNITY_INTC_BASE		(PKUNITY_APB_BASE + 0x600000) /* APB-6 */
-#include "regs-intc.h"
+#include <mach/regs-intc.h>
 #define PKUNITY_RTC_BASE		(PKUNITY_APB_BASE + 0x700000) /* APB-7 */
-#include "regs-rtc.h"
+#include <mach/regs-rtc.h>
 #define PKUNITY_OST_BASE		(PKUNITY_APB_BASE + 0x800000) /* APB-8 */
-#include "regs-ost.h"
+#include <mach/regs-ost.h>
 #define PKUNITY_RESETC_BASE		(PKUNITY_APB_BASE + 0x900000) /* APB-9 */
-#include "regs-resetc.h"
+#include <mach/regs-resetc.h>
 #define PKUNITY_PM_BASE			(PKUNITY_APB_BASE + 0xA00000) /* APB-10 */
-#include "regs-pm.h"
+#include <mach/regs-pm.h>
 #define PKUNITY_PS2_BASE		(PKUNITY_APB_BASE + 0xB00000) /* APB-11 */
-#include "regs-ps2.h"
+#include <mach/regs-ps2.h>
 #define PKUNITY_SDC_BASE		(PKUNITY_APB_BASE + 0xC00000) /* APB-12 */
-#include "regs-sdc.h"
+#include <mach/regs-sdc.h>
 
diff --git a/arch/unicore32/include/mach/hardware.h b/arch/unicore32/include/mach/hardware.h
index 930bea6e129a..9e20b5d9ed50 100644
--- a/arch/unicore32/include/mach/hardware.h
+++ b/arch/unicore32/include/mach/hardware.h
@@ -15,7 +15,7 @@
 #ifndef __MACH_PUV3_HARDWARE_H__
 #define __MACH_PUV3_HARDWARE_H__
 
-#include "PKUnity.h"
+#include <mach/PKUnity.h>
 
 #ifndef __ASSEMBLY__
 #define io_p2v(x)	(void __iomem *)((x) - PKUNITY_MMIO_BASE)
diff --git a/arch/unicore32/include/mach/uncompress.h b/arch/unicore32/include/mach/uncompress.h
index 142d3e7958a9..9be67c9d3b53 100644
--- a/arch/unicore32/include/mach/uncompress.h
+++ b/arch/unicore32/include/mach/uncompress.h
@@ -13,8 +13,8 @@
 #ifndef __MACH_PUV3_UNCOMPRESS_H__
 #define __MACH_PUV3_UNCOMPRESS_H__
 
-#include "hardware.h"
-#include "ocd.h"
+#include <mach/hardware.h>
+#include <mach/ocd.h>
 
 extern char input_data[];
 extern char input_data_end[];
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 58cb6d4085f7..250b8774c158 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -309,9 +309,9 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2)
 #define smp_mb__after_atomic_inc()	barrier()
 
 #ifdef CONFIG_X86_32
-# include "atomic64_32.h"
+# include <asm/atomic64_32.h>
 #else
-# include "atomic64_64.h"
+# include <asm/atomic64_64.h>
 #endif
 
 #endif /* _ASM_X86_ATOMIC_H */
diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h
index 7f8422a28a46..0fa675033912 100644
--- a/arch/x86/include/asm/calling.h
+++ b/arch/x86/include/asm/calling.h
@@ -46,7 +46,7 @@ For 32-bit we have the following conventions - kernel is built with
 
 */
 
-#include "dwarf2.h"
+#include <asm/dwarf2.h>
 
 /*
  * 64-bit system call stack frame layout defines and helpers,
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index 848850fd7d62..5f5bb0f97361 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "checksum_32.h"
+# include <asm/checksum_32.h>
 #else
-# include "checksum_64.h"
+# include <asm/checksum_64.h>
 #endif
diff --git a/arch/x86/include/asm/cmpxchg.h b/arch/x86/include/asm/cmpxchg.h
index 99480e55973d..8d871eaddb66 100644
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -138,9 +138,9 @@ extern void __add_wrong_size(void)
 	__raw_cmpxchg((ptr), (old), (new), (size), "")
 
 #ifdef CONFIG_X86_32
-# include "cmpxchg_32.h"
+# include <asm/cmpxchg_32.h>
 #else
-# include "cmpxchg_64.h"
+# include <asm/cmpxchg_64.h>
 #endif
 
 #ifdef __HAVE_ARCH_CMPXCHG
diff --git a/arch/x86/include/asm/mmzone.h b/arch/x86/include/asm/mmzone.h
index 64217ea16a36..d497bc425cae 100644
--- a/arch/x86/include/asm/mmzone.h
+++ b/arch/x86/include/asm/mmzone.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "mmzone_32.h"
+# include <asm/mmzone_32.h>
 #else
-# include "mmzone_64.h"
+# include <asm/mmzone_64.h>
 #endif
diff --git a/arch/x86/include/asm/mutex.h b/arch/x86/include/asm/mutex.h
index a731b9c573a6..7d3a48275394 100644
--- a/arch/x86/include/asm/mutex.h
+++ b/arch/x86/include/asm/mutex.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "mutex_32.h"
+# include <asm/mutex_32.h>
 #else
-# include "mutex_64.h"
+# include <asm/mutex_64.h>
 #endif
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index bfacd2ccf651..49119fcea2dc 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -53,9 +53,9 @@ static inline int numa_cpu_node(int cpu)
 #endif	/* CONFIG_NUMA */
 
 #ifdef CONFIG_X86_32
-# include "numa_32.h"
+# include <asm/numa_32.h>
 #else
-# include "numa_64.h"
+# include <asm/numa_64.h>
 #endif
 
 #ifdef CONFIG_NUMA
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index df75d07571ce..6e41b9343928 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h
@@ -141,7 +141,7 @@ void default_restore_msi_irqs(struct pci_dev *dev, int irq);
 #endif  /* __KERNEL__ */
 
 #ifdef CONFIG_X86_64
-#include "pci_64.h"
+#include <asm/pci_64.h>
 #endif
 
 /* implement the pci_ DMA API in terms of the generic device dma_ one */
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 49afb3f41eb6..fc9948465293 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -384,9 +384,9 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 #endif	/* __ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
-# include "pgtable_32.h"
+# include <asm/pgtable_32.h>
 #else
-# include "pgtable_64.h"
+# include <asm/pgtable_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index db8fec6d2953..ec8a1fc9505d 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -174,9 +174,9 @@
 #endif
 
 #ifdef CONFIG_X86_32
-# include "pgtable_32_types.h"
+# include <asm/pgtable_32_types.h>
 #else
-# include "pgtable_64_types.h"
+# include <asm/pgtable_64_types.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/posix_types.h b/arch/x86/include/asm/posix_types.h
index 7ef7c3020e5c..bad3665c25fc 100644
--- a/arch/x86/include/asm/posix_types.h
+++ b/arch/x86/include/asm/posix_types.h
@@ -1,15 +1,15 @@
 #ifdef __KERNEL__
 # ifdef CONFIG_X86_32
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #else
 # ifdef __i386__
-#  include "posix_types_32.h"
+#  include <asm/posix_types_32.h>
 # elif defined(__ILP32__)
-#  include "posix_types_x32.h"
+#  include <asm/posix_types_x32.h>
 # else
-#  include "posix_types_64.h"
+#  include <asm/posix_types_64.h>
 # endif
 #endif
diff --git a/arch/x86/include/asm/seccomp.h b/arch/x86/include/asm/seccomp.h
index c62e58a5a90d..0f3d7f099224 100644
--- a/arch/x86/include/asm/seccomp.h
+++ b/arch/x86/include/asm/seccomp.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "seccomp_32.h"
+# include <asm/seccomp_32.h>
 #else
-# include "seccomp_64.h"
+# include <asm/seccomp_64.h>
 #endif
diff --git a/arch/x86/include/asm/string.h b/arch/x86/include/asm/string.h
index 6dfd6d9373a0..09224d7a5862 100644
--- a/arch/x86/include/asm/string.h
+++ b/arch/x86/include/asm/string.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "string_32.h"
+# include <asm/string_32.h>
 #else
-# include "string_64.h"
+# include <asm/string_64.h>
 #endif
diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index 9bd521fe4570..2fab6c2c3575 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -1,5 +1,5 @@
 #ifdef CONFIG_X86_32
-# include "suspend_32.h"
+# include <asm/suspend_32.h>
 #else
-# include "suspend_64.h"
+# include <asm/suspend_64.h>
 #endif
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index a91acfbb1a98..7ccf8d131535 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -589,9 +589,9 @@ extern struct movsl_mask {
 #define ARCH_HAS_NOCACHE_UACCESS 1
 
 #ifdef CONFIG_X86_32
-# include "uaccess_32.h"
+# include <asm/uaccess_32.h>
 #else
-# include "uaccess_64.h"
+# include <asm/uaccess_64.h>
 #endif
 
 #endif /* _ASM_X86_UACCESS_H */
diff --git a/arch/x86/include/asm/user.h b/arch/x86/include/asm/user.h
index 24532c7da3d6..ccab4af1646d 100644
--- a/arch/x86/include/asm/user.h
+++ b/arch/x86/include/asm/user.h
@@ -2,9 +2,9 @@
 #define _ASM_X86_USER_H
 
 #ifdef CONFIG_X86_32
-# include "user_32.h"
+# include <asm/user_32.h>
 #else
-# include "user_64.h"
+# include <asm/user_64.h>
 #endif
 
 #include <asm/types.h>
diff --git a/arch/x86/include/asm/xen/interface.h b/arch/x86/include/asm/xen/interface.h
index cbf0c9d50b92..80502a2bb789 100644
--- a/arch/x86/include/asm/xen/interface.h
+++ b/arch/x86/include/asm/xen/interface.h
@@ -116,9 +116,9 @@ struct arch_shared_info {
 #endif	/* !__ASSEMBLY__ */
 
 #ifdef CONFIG_X86_32
-#include "interface_32.h"
+#include <asm/xen/interface_32.h>
 #else
-#include "interface_64.h"
+#include <asm/xen/interface_64.h>
 #endif
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h
index 7fcf6f3dbcc3..f8fde90bc45e 100644
--- a/arch/x86/include/asm/xor.h
+++ b/arch/x86/include/asm/xor.h
@@ -3,8 +3,8 @@
 # include <asm-generic/xor.h>
 #else
 #ifdef CONFIG_X86_32
-# include "xor_32.h"
+# include <asm/xor_32.h>
 #else
-# include "xor_64.h"
+# include <asm/xor_64.h>
 #endif
 #endif
diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h
index aabd5850bdb9..f79cb7ec0e06 100644
--- a/arch/x86/include/asm/xor_32.h
+++ b/arch/x86/include/asm/xor_32.h
@@ -822,7 +822,7 @@ static struct xor_block_template xor_block_pIII_sse = {
 };
 
 /* Also try the AVX routines */
-#include "xor_avx.h"
+#include <asm/xor_avx.h>
 
 /* Also try the generic routines.  */
 #include <asm-generic/xor.h>
diff --git a/arch/x86/include/asm/xor_64.h b/arch/x86/include/asm/xor_64.h
index 5fc06d0b7eb5..87ac522c4af5 100644
--- a/arch/x86/include/asm/xor_64.h
+++ b/arch/x86/include/asm/xor_64.h
@@ -306,7 +306,7 @@ static struct xor_block_template xor_block_sse = {
 
 
 /* Also try the AVX routines */
-#include "xor_avx.h"
+#include <asm/xor_avx.h>
 
 #undef XOR_TRY_TEMPLATES
 #define XOR_TRY_TEMPLATES			\
diff --git a/include/acpi/acpi.h b/include/acpi/acpi.h
index c433d5e27679..c1ea8436961f 100644
--- a/include/acpi/acpi.h
+++ b/include/acpi/acpi.h
@@ -53,14 +53,14 @@
  *
  * Note: The order of these include files is important.
  */
-#include "platform/acenv.h"	/* Environment-specific items */
-#include "acnames.h"		/* Common ACPI names and strings */
-#include "actypes.h"		/* ACPICA data types and structures */
-#include "acexcep.h"		/* ACPICA exceptions */
-#include "actbl.h"		/* ACPI table definitions */
-#include "acoutput.h"		/* Error output and Debug macros */
-#include "acrestyp.h"		/* Resource Descriptor structs */
-#include "acpiosxf.h"		/* OSL interfaces (ACPICA-to-OS) */
-#include "acpixf.h"		/* ACPI core subsystem external interfaces */
+#include <acpi/platform/acenv.h>	/* Environment-specific items */
+#include <acpi/acnames.h>		/* Common ACPI names and strings */
+#include <acpi/actypes.h>		/* ACPICA data types and structures */
+#include <acpi/acexcep.h>		/* ACPICA exceptions */
+#include <acpi/actbl.h>		/* ACPI table definitions */
+#include <acpi/acoutput.h>		/* Error output and Debug macros */
+#include <acpi/acrestyp.h>		/* Resource Descriptor structs */
+#include <acpi/acpiosxf.h>		/* OSL interfaces (ACPICA-to-OS) */
+#include <acpi/acpixf.h>		/* ACPI core subsystem external interfaces */
 
 #endif				/* __ACPI_H__ */
diff --git a/include/acpi/acpiosxf.h b/include/acpi/acpiosxf.h
index 0650f5fa7ce9..1222ba93d80a 100644
--- a/include/acpi/acpiosxf.h
+++ b/include/acpi/acpiosxf.h
@@ -47,8 +47,8 @@
 #ifndef __ACPIOSXF_H__
 #define __ACPIOSXF_H__
 
-#include "platform/acenv.h"
-#include "actypes.h"
+#include <acpi/platform/acenv.h>
+#include <acpi/actypes.h>
 
 /* Types for acpi_os_execute */
 
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 26a92fc28a59..51405d32ac64 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -49,9 +49,9 @@
 
 #define ACPI_CA_VERSION                 0x20120711
 
-#include "acconfig.h"
-#include "actypes.h"
-#include "actbl.h"
+#include <acpi/acconfig.h>
+#include <acpi/actypes.h>
+#include <acpi/actbl.h>
 
 extern u8 acpi_gbl_permanent_mmap;
 
diff --git a/include/acpi/platform/acenv.h b/include/acpi/platform/acenv.h
index 560a9f272f34..89cee88dd2a5 100644
--- a/include/acpi/platform/acenv.h
+++ b/include/acpi/platform/acenv.h
@@ -138,7 +138,7 @@
 /*! [Begin] no source code translation */
 
 #if defined(_LINUX) || defined(__linux__)
-#include "aclinux.h"
+#include <acpi/platform/aclinux.h>
 
 #elif defined(_AED_EFI)
 #include "acefi.h"
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 7509be30ca01..85d5d8f38452 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -106,7 +106,7 @@
 
 /* Linux uses GCC */
 
-#include "acgcc.h"
+#include <acpi/platform/acgcc.h>
 
 
 #ifdef __KERNEL__
diff --git a/include/drm/drm.h b/include/drm/drm.h
index e51035a3757f..1e3481edf062 100644
--- a/include/drm/drm.h
+++ b/include/drm/drm.h
@@ -628,7 +628,7 @@ struct drm_prime_handle {
 	__s32 fd;
 };
 
-#include "drm_mode.h"
+#include <drm/drm_mode.h>
 
 #define DRM_IOCTL_BASE			'd'
 #define DRM_IO(nr)			_IO(DRM_IOCTL_BASE,nr)
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 1b33df4884b5..0c44e4a000fd 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -85,9 +85,9 @@ struct module;
 struct drm_file;
 struct drm_device;
 
-#include "drm_os_linux.h"
-#include "drm_hashtab.h"
-#include "drm_mm.h"
+#include <drm/drm_os_linux.h>
+#include <drm/drm_hashtab.h>
+#include <drm/drm_mm.h>
 
 #define DRM_UT_CORE 		0x01
 #define DRM_UT_DRIVER		0x02
@@ -676,7 +676,7 @@ struct drm_gem_object {
 	struct dma_buf_attachment *import_attach;
 };
 
-#include "drm_crtc.h"
+#include <drm/drm_crtc.h>
 
 /* per-master structure */
 struct drm_master {
@@ -1304,7 +1304,7 @@ extern void drm_vm_close_locked(struct drm_device *dev, struct vm_area_struct *v
 extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait);
 
 				/* Memory management support (drm_memory.h) */
-#include "drm_memory.h"
+#include <drm/drm_memory.h>
 extern void drm_free_agp(DRM_AGP_MEM * handle, int pages);
 extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start);
 extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev,
@@ -1613,7 +1613,7 @@ void drm_gem_vm_open(struct vm_area_struct *vma);
 void drm_gem_vm_close(struct vm_area_struct *vma);
 int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 
-#include "drm_global.h"
+#include <drm/drm_global.h>
 
 static inline void
 drm_gem_object_reference(struct drm_gem_object *obj)
@@ -1722,7 +1722,7 @@ static __inline__ void drm_core_dropmap(struct drm_local_map *map)
 {
 }
 
-#include "drm_mem_util.h"
+#include <drm/drm_mem_util.h>
 
 extern int drm_fill_in_dev(struct drm_device *dev,
 			   const struct pci_device_id *ent,
diff --git a/include/drm/drm_buffer.h b/include/drm/drm_buffer.h
index 322dbff3f861..c80d3a340b94 100644
--- a/include/drm/drm_buffer.h
+++ b/include/drm/drm_buffer.h
@@ -35,7 +35,7 @@
 #ifndef _DRM_BUFFER_H_
 #define _DRM_BUFFER_H_
 
-#include "drmP.h"
+#include <drm/drmP.h>
 
 struct drm_buffer {
 	int iterator;
diff --git a/include/drm/drm_encoder_slave.h b/include/drm/drm_encoder_slave.h
index 7dc385233805..b0c11a7809bb 100644
--- a/include/drm/drm_encoder_slave.h
+++ b/include/drm/drm_encoder_slave.h
@@ -27,8 +27,8 @@
 #ifndef __DRM_ENCODER_SLAVE_H__
 #define __DRM_ENCODER_SLAVE_H__
 
-#include "drmP.h"
-#include "drm_crtc.h"
+#include <drm/drmP.h>
+#include <drm/drm_crtc.h>
 
 /**
  * struct drm_encoder_slave_funcs - Entry points exposed by a slave encoder driver
diff --git a/include/drm/drm_memory.h b/include/drm/drm_memory.h
index 15af9b32ae42..4baf57a207e7 100644
--- a/include/drm/drm_memory.h
+++ b/include/drm/drm_memory.h
@@ -35,7 +35,7 @@
 
 #include <linux/highmem.h>
 #include <linux/vmalloc.h>
-#include "drmP.h"
+#include <drm/drmP.h>
 
 /**
  * Cut down version of drm_memory_debug.h, which used to be called
diff --git a/include/drm/drm_sarea.h b/include/drm/drm_sarea.h
index ee5389d22c64..d3aedc90b9fd 100644
--- a/include/drm/drm_sarea.h
+++ b/include/drm/drm_sarea.h
@@ -32,7 +32,7 @@
 #ifndef _DRM_SAREA_H_
 #define _DRM_SAREA_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* SAREA area needs to be at least a page */
 #if defined(__alpha__)
diff --git a/include/drm/exynos_drm.h b/include/drm/exynos_drm.h
index c20b00181530..1f2acdfbfd6d 100644
--- a/include/drm/exynos_drm.h
+++ b/include/drm/exynos_drm.h
@@ -29,7 +29,7 @@
 #ifndef _EXYNOS_DRM_H_
 #define _EXYNOS_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /**
  * User-desired buffer creation information structure.
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 8cc70837f929..814a42c89422 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -27,7 +27,7 @@
 #ifndef _I915_DRM_H_
 #define _I915_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* Please note that modifications to all structs defined here are
  * subject to backwards-compatibility constraints.
diff --git a/include/drm/mga_drm.h b/include/drm/mga_drm.h
index fca817009e13..2375bfd6e5e9 100644
--- a/include/drm/mga_drm.h
+++ b/include/drm/mga_drm.h
@@ -35,7 +35,7 @@
 #ifndef __MGA_DRM_H__
 #define __MGA_DRM_H__
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the Xserver file (mga_sarea.h)
diff --git a/include/drm/radeon_drm.h b/include/drm/radeon_drm.h
index dc3a8cd7db8a..4766c0f6a838 100644
--- a/include/drm/radeon_drm.h
+++ b/include/drm/radeon_drm.h
@@ -33,7 +33,7 @@
 #ifndef __RADEON_DRM_H__
 #define __RADEON_DRM_H__
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: If you change any of these defines, make sure to change the
  * defines in the X server file (radeon_sarea.h)
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index e15f2a89a270..e8028ade567f 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -31,7 +31,7 @@
 #ifndef _TTM_BO_API_H_
 #define _TTM_BO_API_H_
 
-#include "drm_hashtab.h"
+#include <drm/drm_hashtab.h>
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/wait.h>
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 084e8989a6e1..d803b92b0324 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -30,14 +30,14 @@
 #ifndef _TTM_BO_DRIVER_H_
 #define _TTM_BO_DRIVER_H_
 
-#include "ttm/ttm_bo_api.h"
-#include "ttm/ttm_memory.h"
-#include "ttm/ttm_module.h"
-#include "drm_mm.h"
-#include "drm_global.h"
-#include "linux/workqueue.h"
-#include "linux/fs.h"
-#include "linux/spinlock.h"
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_memory.h>
+#include <ttm/ttm_module.h>
+#include <drm/drm_mm.h>
+#include <drm/drm_global.h>
+#include <linux/workqueue.h>
+#include <linux/fs.h>
+#include <linux/spinlock.h>
 
 struct ttm_backend_func {
 	/**
diff --git a/include/drm/ttm/ttm_execbuf_util.h b/include/drm/ttm/ttm_execbuf_util.h
index 26cc7f9ffa41..1926cae373ba 100644
--- a/include/drm/ttm/ttm_execbuf_util.h
+++ b/include/drm/ttm/ttm_execbuf_util.h
@@ -31,7 +31,7 @@
 #ifndef _TTM_EXECBUF_UTIL_H_
 #define _TTM_EXECBUF_UTIL_H_
 
-#include "ttm/ttm_bo_api.h"
+#include <ttm/ttm_bo_api.h>
 #include <linux/list.h>
 
 /**
diff --git a/include/drm/ttm/ttm_lock.h b/include/drm/ttm/ttm_lock.h
index 2e7f0c941b5d..2902beb5f689 100644
--- a/include/drm/ttm/ttm_lock.h
+++ b/include/drm/ttm/ttm_lock.h
@@ -49,7 +49,7 @@
 #ifndef _TTM_LOCK_H_
 #define _TTM_LOCK_H_
 
-#include "ttm/ttm_object.h"
+#include <ttm/ttm_object.h>
 #include <linux/wait.h>
 #include <linux/atomic.h>
 
diff --git a/include/drm/ttm/ttm_object.h b/include/drm/ttm/ttm_object.h
index e46054e5255b..b01c563b2751 100644
--- a/include/drm/ttm/ttm_object.h
+++ b/include/drm/ttm/ttm_object.h
@@ -38,7 +38,7 @@
 #define _TTM_OBJECT_H_
 
 #include <linux/list.h>
-#include "drm_hashtab.h"
+#include <drm/drm_hashtab.h>
 #include <linux/kref.h>
 #include <ttm/ttm_memory.h>
 
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 5fe27400d176..706b962c6467 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -26,8 +26,8 @@
 #ifndef TTM_PAGE_ALLOC
 #define TTM_PAGE_ALLOC
 
-#include "ttm_bo_driver.h"
-#include "ttm_memory.h"
+#include <drm/ttm/ttm_bo_driver.h>
+#include <drm/ttm/ttm_memory.h>
 
 /**
  * Initialize pool allocator.
diff --git a/include/drm/via_drm.h b/include/drm/via_drm.h
index 79b3b6e0f6b3..8b0533ccbd5a 100644
--- a/include/drm/via_drm.h
+++ b/include/drm/via_drm.h
@@ -24,7 +24,7 @@
 #ifndef _VIA_DRM_H_
 #define _VIA_DRM_H_
 
-#include "drm.h"
+#include <drm/drm.h>
 
 /* WARNING: These defines must be the same as what the Xserver uses.
  * if you change them, you must change the defines in the Xserver.
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 1954a4e305a3..4180eb78d575 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -10,7 +10,7 @@
 #include <linux/bcma/bcma_driver_gmac_cmn.h>
 #include <linux/ssb/ssb.h> /* SPROM sharing */
 
-#include "bcma_regs.h"
+#include <linux/bcma/bcma_regs.h>
 
 struct bcma_device;
 struct bcma_bus;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index d021610efd65..cf6f4d998a76 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -12,8 +12,8 @@
 #ifndef CEPH_FS_H
 #define CEPH_FS_H
 
-#include "msgr.h"
-#include "rados.h"
+#include <linux/ceph/msgr.h>
+#include <linux/ceph/rados.h>
 
 /*
  * subprotocol versions.  when specific messages types or high-level
diff --git a/include/linux/ceph/debugfs.h b/include/linux/ceph/debugfs.h
index 2a79702e092b..1df086d7882d 100644
--- a/include/linux/ceph/debugfs.h
+++ b/include/linux/ceph/debugfs.h
@@ -1,8 +1,8 @@
 #ifndef _FS_CEPH_DEBUGFS_H
 #define _FS_CEPH_DEBUGFS_H
 
-#include "ceph_debug.h"
-#include "types.h"
+#include <linux/ceph/ceph_debug.h>
+#include <linux/ceph/types.h>
 
 #define CEPH_DEFINE_SHOW_FUNC(name)					\
 static int name##_open(struct inode *inode, struct file *file)		\
diff --git a/include/linux/ceph/decode.h b/include/linux/ceph/decode.h
index 4bbf2db45f46..63d092822bad 100644
--- a/include/linux/ceph/decode.h
+++ b/include/linux/ceph/decode.h
@@ -6,7 +6,7 @@
 #include <linux/time.h>
 #include <asm/unaligned.h>
 
-#include "types.h"
+#include <linux/ceph/types.h>
 
 /*
  * in all cases,
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 42624789b06f..6470792b13d3 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -1,7 +1,7 @@
 #ifndef _FS_CEPH_LIBCEPH_H
 #define _FS_CEPH_LIBCEPH_H
 
-#include "ceph_debug.h"
+#include <linux/ceph/ceph_debug.h>
 
 #include <asm/unaligned.h>
 #include <linux/backing-dev.h>
@@ -15,12 +15,12 @@
 #include <linux/writeback.h>
 #include <linux/slab.h>
 
-#include "types.h"
-#include "messenger.h"
-#include "msgpool.h"
-#include "mon_client.h"
-#include "osd_client.h"
-#include "ceph_fs.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/messenger.h>
+#include <linux/ceph/msgpool.h>
+#include <linux/ceph/mon_client.h>
+#include <linux/ceph/osd_client.h>
+#include <linux/ceph/ceph_fs.h>
 
 /*
  * mount options
diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h
index 9935fac8c107..cb15b5d867c7 100644
--- a/include/linux/ceph/mdsmap.h
+++ b/include/linux/ceph/mdsmap.h
@@ -2,7 +2,7 @@
 #define _FS_CEPH_MDSMAP_H
 
 #include <linux/bug.h>
-#include "types.h"
+#include <linux/ceph/types.h>
 
 /*
  * mds map - describe servers in the mds cluster.
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 189ae0637634..14ba5ee738a9 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -8,8 +8,8 @@
 #include <linux/uio.h>
 #include <linux/workqueue.h>
 
-#include "types.h"
-#include "buffer.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/buffer.h>
 
 struct ceph_msg;
 struct ceph_connection;
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index 2113e3850a4e..1fb93e9080b0 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -5,7 +5,7 @@
 #include <linux/kref.h>
 #include <linux/rbtree.h>
 
-#include "messenger.h"
+#include <linux/ceph/messenger.h>
 
 struct ceph_client;
 struct ceph_mount_args;
diff --git a/include/linux/ceph/msgpool.h b/include/linux/ceph/msgpool.h
index 09fa96b43436..4b0d38960726 100644
--- a/include/linux/ceph/msgpool.h
+++ b/include/linux/ceph/msgpool.h
@@ -2,7 +2,7 @@
 #define _FS_CEPH_MSGPOOL
 
 #include <linux/mempool.h>
-#include "messenger.h"
+#include <linux/ceph/messenger.h>
 
 /*
  * we use memory pools for preallocating messages we may receive, to
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 311ef8d6aa9e..25b930bffea6 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -2,8 +2,8 @@
 #define _FS_CEPH_OSDMAP_H
 
 #include <linux/rbtree.h>
-#include "types.h"
-#include "ceph_fs.h"
+#include <linux/ceph/types.h>
+#include <linux/ceph/ceph_fs.h>
 #include <linux/crush/crush.h>
 
 /*
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 0a99099801a4..de91fbdf127e 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -6,7 +6,7 @@
  * (Reliable Autonomic Distributed Object Store).
  */
 
-#include "msgr.h"
+#include <linux/ceph/msgr.h>
 
 /*
  * osdmap encoding versions
diff --git a/include/linux/ceph/types.h b/include/linux/ceph/types.h
index 28b35a005ec2..d3ff1cf2d27e 100644
--- a/include/linux/ceph/types.h
+++ b/include/linux/ceph/types.h
@@ -7,9 +7,9 @@
 #include <linux/fcntl.h>
 #include <linux/string.h>
 
-#include "ceph_fs.h"
-#include "ceph_frag.h"
-#include "ceph_hash.h"
+#include <linux/ceph/ceph_fs.h>
+#include <linux/ceph/ceph_frag.h>
+#include <linux/ceph/ceph_hash.h>
 
 /*
  * Identify inodes by both their ino AND snapshot id (a u64).
diff --git a/include/linux/crush/mapper.h b/include/linux/crush/mapper.h
index 71d79f44a7d0..5772dee3ecbf 100644
--- a/include/linux/crush/mapper.h
+++ b/include/linux/crush/mapper.h
@@ -8,7 +8,7 @@
  * LGPL2
  */
 
-#include "crush.h"
+#include <linux/crush/crush.h>
 
 extern int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size);
 extern int crush_do_rule(const struct crush_map *map,
diff --git a/include/linux/drbd_tag_magic.h b/include/linux/drbd_tag_magic.h
index 81f52f2c5724..82de1f9e48b1 100644
--- a/include/linux/drbd_tag_magic.h
+++ b/include/linux/drbd_tag_magic.h
@@ -12,7 +12,7 @@ enum packet_types {
 #define NL_INT64(pn, pr, member)
 #define NL_BIT(pn, pr, member)
 #define NL_STRING(pn, pr, member, len)
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 	P_nl_after_last_packet,
 };
 
@@ -37,7 +37,7 @@ static const int tag_list_sizes[] = {
 #define NL_INT64(pn, pr, member)        + 4 + 8
 #define NL_BIT(pn, pr, member)          + 4 + 1
 #define NL_STRING(pn, pr, member, len)  + 4 + (len)
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 /* The two highest bits are used for the tag type */
@@ -62,7 +62,7 @@ enum drbd_tags {
 #define NL_INT64(pn, pr, member)       T_ ## member = pn | TT_INT64   | pr ,
 #define NL_BIT(pn, pr, member)         T_ ## member = pn | TT_BIT     | pr ,
 #define NL_STRING(pn, pr, member, len) T_ ## member = pn | TT_STRING  | pr ,
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 struct tag {
@@ -78,7 +78,7 @@ static const struct tag tag_descriptions[] = {
 #define NL_INT64(pn, pr, member)       [ pn ] = { #member, TT_INT64   | pr, sizeof(__u64) },
 #define NL_BIT(pn, pr, member)         [ pn ] = { #member, TT_BIT     | pr, sizeof(int)   },
 #define NL_STRING(pn, pr, member, len) [ pn ] = { #member, TT_STRING  | pr, (len)         },
-#include "drbd_nl.h"
+#include <linux/drbd_nl.h>
 };
 
 #endif
diff --git a/include/linux/libfdt.h b/include/linux/libfdt.h
index 4c0306c69b4e..a0c3bf6c9edb 100644
--- a/include/linux/libfdt.h
+++ b/include/linux/libfdt.h
@@ -2,7 +2,7 @@
 #define _INCLUDE_LIBFDT_H_
 
 #include <linux/libfdt_env.h>
-#include "../../scripts/dtc/libfdt/fdt.h"
-#include "../../scripts/dtc/libfdt/libfdt.h"
+#include <>
+#include <>
 
 #endif /* _INCLUDE_LIBFDT_H_ */
diff --git a/include/linux/netfilter/nf_conntrack_h323_asn1.h b/include/linux/netfilter/nf_conntrack_h323_asn1.h
index 8dab5968fc7e..3176a277eed1 100644
--- a/include/linux/netfilter/nf_conntrack_h323_asn1.h
+++ b/include/linux/netfilter/nf_conntrack_h323_asn1.h
@@ -40,7 +40,7 @@
 /*****************************************************************************
  * H.323 Types
  ****************************************************************************/
-#include "nf_conntrack_h323_types.h"
+#include <linux/netfilter/nf_conntrack_h323_types.h>
 
 typedef struct {
 	enum {
diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h
index e9b7f4350844..cbb07f850791 100644
--- a/include/linux/pinctrl/consumer.h
+++ b/include/linux/pinctrl/consumer.h
@@ -15,7 +15,7 @@
 #include <linux/err.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 /* This struct is private to the core and should be regarded as a cookie */
 struct pinctrl;
diff --git a/include/linux/pinctrl/machine.h b/include/linux/pinctrl/machine.h
index 7d22ab00343f..e5b1716f98cc 100644
--- a/include/linux/pinctrl/machine.h
+++ b/include/linux/pinctrl/machine.h
@@ -14,7 +14,7 @@
 
 #include <linux/bug.h>
 
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 enum pinctrl_map_type {
 	PIN_MAP_TYPE_INVALID,
diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h
index 69393a662532..7d087f03e91e 100644
--- a/include/linux/pinctrl/pinctrl.h
+++ b/include/linux/pinctrl/pinctrl.h
@@ -17,7 +17,7 @@
 #include <linux/radix-tree.h>
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl-state.h"
+#include <linux/pinctrl/pinctrl-state.h>
 
 struct device;
 struct pinctrl_dev;
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 1818dcbdd9ab..c15395031cb3 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -14,7 +14,7 @@
 
 #include <linux/list.h>
 #include <linux/seq_file.h>
-#include "pinctrl.h"
+#include <linux/pinctrl/pinctrl.h>
 
 #ifdef CONFIG_PINMUX
 
diff --git a/include/scsi/osd_attributes.h b/include/scsi/osd_attributes.h
index 56e920ade326..303ba1118a4d 100644
--- a/include/scsi/osd_attributes.h
+++ b/include/scsi/osd_attributes.h
@@ -1,7 +1,7 @@
 #ifndef __OSD_ATTRIBUTES_H__
 #define __OSD_ATTRIBUTES_H__
 
-#include "osd_protocol.h"
+#include <scsi/osd_protocol.h>
 
 /*
  * Contains types and constants that define attribute pages and attribute
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index 572fb5493661..b2e85fdd2ae0 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -14,8 +14,8 @@
 #ifndef __OSD_INITIATOR_H__
 #define __OSD_INITIATOR_H__
 
-#include "osd_protocol.h"
-#include "osd_types.h"
+#include <scsi/osd_protocol.h>
+#include <scsi/osd_types.h>
 
 #include <linux/blkdev.h>
 #include <scsi/scsi_device.h>
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index 4c09fee8ae1e..f96151c9c9e8 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -14,8 +14,8 @@
 #ifndef __OSD_SEC_H__
 #define __OSD_SEC_H__
 
-#include "osd_protocol.h"
-#include "osd_types.h"
+#include <scsi/osd_protocol.h>
+#include <scsi/osd_types.h>
 
 /*
  * Contains types and constants of osd capabilities and security
diff --git a/include/sound/ac97_codec.h b/include/sound/ac97_codec.h
index 02cbb50225bb..fdeb8dceec0f 100644
--- a/include/sound/ac97_codec.h
+++ b/include/sound/ac97_codec.h
@@ -28,9 +28,9 @@
 #include <linux/bitops.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
-#include "pcm.h"
-#include "control.h"
-#include "info.h"
+#include <sound/pcm.h>
+#include <sound/control.h>
+#include <sound/info.h>
 
 /* maximum number of devices on the AC97 bus */
 #define	AC97_BUS_MAX_DEVICES	4
diff --git a/include/sound/ad1816a.h b/include/sound/ad1816a.h
index d010858c33c2..a7d8dc782e7c 100644
--- a/include/sound/ad1816a.h
+++ b/include/sound/ad1816a.h
@@ -20,9 +20,9 @@
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
 */
 
-#include "control.h"
-#include "pcm.h"
-#include "timer.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/timer.h>
 
 #define AD1816A_REG(r)			(chip->port + r)
 
diff --git a/include/sound/ak4531_codec.h b/include/sound/ak4531_codec.h
index 575296cf7987..85ea86ea35b3 100644
--- a/include/sound/ak4531_codec.h
+++ b/include/sound/ak4531_codec.h
@@ -25,8 +25,8 @@
  *
  */
 
-#include "info.h"
-#include "control.h"
+#include <sound/info.h>
+#include <sound/control.h>
 
 /*
  *  ASAHI KASEI - AK4531 codec
diff --git a/include/sound/emu10k1_synth.h b/include/sound/emu10k1_synth.h
index 6ef61c420935..9f211e957bf9 100644
--- a/include/sound/emu10k1_synth.h
+++ b/include/sound/emu10k1_synth.h
@@ -20,8 +20,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "emu10k1.h"
-#include "emux_synth.h"
+#include <sound/emu10k1.h>
+#include <sound/emux_synth.h>
 
 /* sequencer device id */
 #define SNDRV_SEQ_DEV_ID_EMU10K1_SYNTH	"emu10k1-synth"
diff --git a/include/sound/emu8000.h b/include/sound/emu8000.h
index c8f66bde6d95..c321302a9143 100644
--- a/include/sound/emu8000.h
+++ b/include/sound/emu8000.h
@@ -21,8 +21,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "emux_synth.h"
-#include "seq_kernel.h"
+#include <sound/emux_synth.h>
+#include <sound/seq_kernel.h>
 
 /*
  * Hardware parameters.
diff --git a/include/sound/emux_legacy.h b/include/sound/emux_legacy.h
index 6fe3da2a5e15..baf43fc24d39 100644
--- a/include/sound/emux_legacy.h
+++ b/include/sound/emux_legacy.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "seq_oss_legacy.h"
+#include <sound/seq_oss_legacy.h>
 
 /*
  * awe hardware controls
diff --git a/include/sound/emux_synth.h b/include/sound/emux_synth.h
index d8cb51b86c20..fb81f3722b6a 100644
--- a/include/sound/emux_synth.h
+++ b/include/sound/emux_synth.h
@@ -21,15 +21,15 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "seq_kernel.h"
-#include "seq_device.h"
-#include "soundfont.h"
-#include "seq_midi_emul.h"
+#include <sound/seq_kernel.h>
+#include <sound/seq_device.h>
+#include <sound/soundfont.h>
+#include <sound/seq_midi_emul.h>
 #ifdef CONFIG_SND_SEQUENCER_OSS
-#include "seq_oss.h"
+#include <sound/seq_oss.h>
 #endif
-#include "emux_legacy.h"
-#include "seq_virmidi.h"
+#include <sound/emux_legacy.h>
+#include <sound/seq_virmidi.h>
 
 /*
  * compile flags
diff --git a/include/sound/es1688.h b/include/sound/es1688.h
index f752dd33dfaf..1d636a2d8896 100644
--- a/include/sound/es1688.h
+++ b/include/sound/es1688.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "control.h"
-#include "pcm.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
 #include <linux/interrupt.h>
 
 #define ES1688_HW_AUTO		0x0000
diff --git a/include/sound/gus.h b/include/sound/gus.h
index 841bb8df38c1..42905d811da7 100644
--- a/include/sound/gus.h
+++ b/include/sound/gus.h
@@ -22,11 +22,11 @@
  *
  */
 
-#include "pcm.h"
-#include "rawmidi.h"
-#include "timer.h"
-#include "seq_midi_emul.h"
-#include "seq_device.h"
+#include <sound/pcm.h>
+#include <sound/rawmidi.h>
+#include <sound/timer.h>
+#include <sound/seq_midi_emul.h>
+#include <sound/seq_device.h>
 #include <asm/io.h>
 
 /* IO ports */
diff --git a/include/sound/mpu401.h b/include/sound/mpu401.h
index 20230db00ef1..e94209692513 100644
--- a/include/sound/mpu401.h
+++ b/include/sound/mpu401.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "rawmidi.h"
+#include <sound/rawmidi.h>
 #include <linux/interrupt.h>
 
 #define MPU401_HW_MPU401		1	/* native MPU401 */
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index cdca2ab1e711..d0711bc8c914 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -35,7 +35,7 @@
 #define snd_pcm_chip(pcm) ((pcm)->private_data)
 
 #if defined(CONFIG_SND_PCM_OSS) || defined(CONFIG_SND_PCM_OSS_MODULE)
-#include "pcm_oss.h"
+#include <sound/pcm_oss.h>
 #endif
 
 /*
diff --git a/include/sound/rawmidi.h b/include/sound/rawmidi.h
index 6b14359d9fed..adf0885153f3 100644
--- a/include/sound/rawmidi.h
+++ b/include/sound/rawmidi.h
@@ -30,7 +30,7 @@
 #include <linux/workqueue.h>
 
 #if defined(CONFIG_SND_SEQUENCER) || defined(CONFIG_SND_SEQUENCER_MODULE)
-#include "seq_device.h"
+#include <sound/seq_device.h>
 #endif
 
 /*
diff --git a/include/sound/sb.h b/include/sound/sb.h
index 95353542256a..ba3960329646 100644
--- a/include/sound/sb.h
+++ b/include/sound/sb.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "pcm.h"
-#include "rawmidi.h"
+#include <sound/pcm.h>
+#include <sound/rawmidi.h>
 #include <linux/interrupt.h>
 #include <asm/io.h>
 
diff --git a/include/sound/sb16_csp.h b/include/sound/sb16_csp.h
index af1b49e982df..7e950560e591 100644
--- a/include/sound/sb16_csp.h
+++ b/include/sound/sb16_csp.h
@@ -119,8 +119,8 @@ struct snd_sb_csp_info {
 #define SNDRV_SB_CSP_IOCTL_RESTART	_IO('H', 0x16)
 
 #ifdef __KERNEL__
-#include "sb.h"
-#include "hwdep.h"
+#include <sound/sb.h>
+#include <sound/hwdep.h>
 #include <linux/firmware.h>
 
 struct snd_sb_csp;
diff --git a/include/sound/seq_kernel.h b/include/sound/seq_kernel.h
index f352a98ce4f4..2398521f0998 100644
--- a/include/sound/seq_kernel.h
+++ b/include/sound/seq_kernel.h
@@ -22,7 +22,7 @@
  *
  */
 #include <linux/time.h>
-#include "asequencer.h"
+#include <sound/asequencer.h>
 
 typedef struct snd_seq_real_time snd_seq_real_time_t;
 typedef union snd_seq_timestamp snd_seq_timestamp_t;
diff --git a/include/sound/seq_midi_emul.h b/include/sound/seq_midi_emul.h
index d6c4615901b9..8139d8c191ed 100644
--- a/include/sound/seq_midi_emul.h
+++ b/include/sound/seq_midi_emul.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "seq_kernel.h"
+#include <sound/seq_kernel.h>
 
 /*
  * This structure is used to keep track of the current state on each
diff --git a/include/sound/seq_midi_event.h b/include/sound/seq_midi_event.h
index 5efab8b29c57..e40f43e6fc7b 100644
--- a/include/sound/seq_midi_event.h
+++ b/include/sound/seq_midi_event.h
@@ -22,7 +22,7 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "asequencer.h"
+#include <sound/asequencer.h>
 
 #define MAX_MIDI_EVENT_BUF	256
 
diff --git a/include/sound/seq_oss.h b/include/sound/seq_oss.h
index 9b060bbd6e02..d0b27ec6f8b8 100644
--- a/include/sound/seq_oss.h
+++ b/include/sound/seq_oss.h
@@ -21,8 +21,8 @@
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "asequencer.h"
-#include "seq_kernel.h"
+#include <sound/asequencer.h>
+#include <sound/seq_kernel.h>
 
 /*
  * argument structure for synthesizer operations
diff --git a/include/sound/seq_virmidi.h b/include/sound/seq_virmidi.h
index d888433a3096..a03acd0d398a 100644
--- a/include/sound/seq_virmidi.h
+++ b/include/sound/seq_virmidi.h
@@ -22,8 +22,8 @@
  *
  */
 
-#include "rawmidi.h"
-#include "seq_midi_event.h"
+#include <sound/rawmidi.h>
+#include <sound/seq_midi_event.h>
 
 /*
  * device file instance:
diff --git a/include/sound/snd_wavefront.h b/include/sound/snd_wavefront.h
index fa149ca77e4b..35e94b3d1ec7 100644
--- a/include/sound/snd_wavefront.h
+++ b/include/sound/snd_wavefront.h
@@ -1,10 +1,10 @@
 #ifndef __SOUND_SND_WAVEFRONT_H__
 #define __SOUND_SND_WAVEFRONT_H__
 
-#include "mpu401.h"
-#include "hwdep.h"
-#include "rawmidi.h"
-#include "wavefront.h"  /* generic OSS/ALSA/user-level wavefront header */
+#include <sound/mpu401.h>
+#include <sound/hwdep.h>
+#include <sound/rawmidi.h>
+#include <sound/wavefront.h>  /* generic OSS/ALSA/user-level wavefront header */
 
 /* MIDI interface */
 
diff --git a/include/sound/soundfont.h b/include/sound/soundfont.h
index 679df0574066..7c93efdba90d 100644
--- a/include/sound/soundfont.h
+++ b/include/sound/soundfont.h
@@ -22,8 +22,8 @@
  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  */
 
-#include "sfnt_info.h"
-#include "util_mem.h"
+#include <sound/sfnt_info.h>
+#include <sound/util_mem.h>
 
 #define SF_MAX_INSTRUMENTS	128	/* maximum instrument number */
 #define SF_MAX_PRESETS  256	/* drums are mapped from 128 to 256 */
diff --git a/include/sound/tea6330t.h b/include/sound/tea6330t.h
index 51b282b76896..e6beec23d7f2 100644
--- a/include/sound/tea6330t.h
+++ b/include/sound/tea6330t.h
@@ -22,7 +22,7 @@
  *
  */
 
-#include "i2c.h"		/* generic i2c support */
+#include <sound/i2c.h>		/* generic i2c support */
 
 int snd_tea6330t_detect(struct snd_i2c_bus *bus, int equalizer);
 int snd_tea6330t_update_mixer(struct snd_card *card, struct snd_i2c_bus *bus,
diff --git a/include/sound/wss.h b/include/sound/wss.h
index fd01f22825cd..0c7f034f1e86 100644
--- a/include/sound/wss.h
+++ b/include/sound/wss.h
@@ -22,11 +22,11 @@
  *
  */
 
-#include "control.h"
-#include "pcm.h"
-#include "timer.h"
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/timer.h>
 
-#include "cs4231-regs.h"
+#include <sound/cs4231-regs.h>
 
 /* defines for codec.mode */
 
diff --git a/include/trace/events/compaction.h b/include/trace/events/compaction.h
index 388bcdd26d46..fde1b3e94c7d 100644
--- a/include/trace/events/compaction.h
+++ b/include/trace/events/compaction.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 DECLARE_EVENT_CLASS(mm_compaction_isolate_template,
 
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 08fa27244da7..6bc943ecb841 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -6,7 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 DECLARE_EVENT_CLASS(kmem_alloc,
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index bab3b87e4064..63cfcccaebb3 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -8,7 +8,7 @@
 #include <linux/tracepoint.h>
 #include <linux/mm.h>
 #include <linux/memcontrol.h>
-#include "gfpflags.h"
+#include <trace/events/gfpflags.h>
 
 #define RECLAIM_WB_ANON		0x0001u
 #define RECLAIM_WB_FILE		0x0002u
diff --git a/include/xen/interface/callback.h b/include/xen/interface/callback.h
index 2ae3cd243264..8c5fa0e20155 100644
--- a/include/xen/interface/callback.h
+++ b/include/xen/interface/callback.h
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_CALLBACK_H__
 #define __XEN_PUBLIC_CALLBACK_H__
 
-#include "xen.h"
+#include <xen/interface/xen.h>
 
 /*
  * Prototype for this hypercall is:
diff --git a/include/xen/interface/hvm/params.h b/include/xen/interface/hvm/params.h
index 1b4f923d7086..a6c79911e729 100644
--- a/include/xen/interface/hvm/params.h
+++ b/include/xen/interface/hvm/params.h
@@ -21,7 +21,7 @@
 #ifndef __XEN_PUBLIC_HVM_PARAMS_H__
 #define __XEN_PUBLIC_HVM_PARAMS_H__
 
-#include "hvm_op.h"
+#include <xen/interface/hvm/hvm_op.h>
 
 /*
  * Parameter space for HVMOP_{set,get}_param.
diff --git a/include/xen/interface/io/blkif.h b/include/xen/interface/io/blkif.h
index ee338bfde18b..01c3d62436ef 100644
--- a/include/xen/interface/io/blkif.h
+++ b/include/xen/interface/io/blkif.h
@@ -9,8 +9,8 @@
 #ifndef __XEN_PUBLIC_IO_BLKIF_H__
 #define __XEN_PUBLIC_IO_BLKIF_H__
 
-#include "ring.h"
-#include "../grant_table.h"
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
 
 /*
  * Front->back notifications: When enqueuing a new request, sending a
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index cb94668f6e9f..9dfc12000980 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -9,8 +9,8 @@
 #ifndef __XEN_PUBLIC_IO_NETIF_H__
 #define __XEN_PUBLIC_IO_NETIF_H__
 
-#include "ring.h"
-#include "../grant_table.h"
+#include <xen/interface/io/ring.h>
+#include <xen/interface/grant_table.h>
 
 /*
  * Notifications after enqueuing any type of message should be conditional on
diff --git a/include/xen/interface/platform.h b/include/xen/interface/platform.h
index 61fa66160983..52ff8377d3bd 100644
--- a/include/xen/interface/platform.h
+++ b/include/xen/interface/platform.h
@@ -27,7 +27,7 @@
 #ifndef __XEN_PUBLIC_PLATFORM_H__
 #define __XEN_PUBLIC_PLATFORM_H__
 
-#include "xen.h"
+#include <xen/interface/xen.h>
 
 #define XENPF_INTERFACE_VERSION 0x03000001
 
diff --git a/include/xen/interface/sched.h b/include/xen/interface/sched.h
index dd55dac340de..9ce083960a25 100644
--- a/include/xen/interface/sched.h
+++ b/include/xen/interface/sched.h
@@ -9,7 +9,7 @@
 #ifndef __XEN_PUBLIC_SCHED_H__
 #define __XEN_PUBLIC_SCHED_H__
 
-#include "event_channel.h"
+#include <xen/interface/event_channel.h>
 
 /*
  * The prototype for this hypercall is:
diff --git a/include/xen/interface/version.h b/include/xen/interface/version.h
index e8b6519d47e9..ff372a5ddfe7 100644
--- a/include/xen/interface/version.h
+++ b/include/xen/interface/version.h
@@ -55,7 +55,7 @@ struct xen_feature_info {
 };
 
 /* Declares the features reported by XENVER_get_features. */
-#include "features.h"
+#include <xen/interface/features.h>
 
 /* arg == NULL; returns host memory page size. */
 #define XENVER_pagesize 7
-- 
cgit v1.2.3


From abbf1590de22a6d2240a59383477da50d1402f6a Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:26 +0100
Subject: UAPI: Partition the header include path sets and add uapi/ header
 directories

Partition the header include path flags into two sets, one for kernelspace
builds and one for userspace builds.

Add the following directories to build after the ordinary include directories
so that #include will pick up the UAPI header directly if the kernel header
has been moved there.

The userspace set (represented by the USERINCLUDE make variable) contains:

	-I $(srctree)/arch/$(hdr-arch)/include/uapi
	-I arch/$(hdr-arch)/include/generated/uapi
	-I $(srctree)/include/uapi
	-I include/generated/uapi
	-include $(srctree)/include/linux/kconfig.h

and the kernelspace set (represented by the LINUXINCLUDE make variable)
contains:

	-I $(srctree)/arch/$(hdr-arch)/include
	-I arch/$(hdr-arch)/include/generated
	-I $(srctree)/include
	-I include		--- if not building in the source tree

plus everything in the USERINCLUDE set.

Then use USERINCLUDE in building the x86 boot code.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 Makefile                          | 18 ++++++++++++++----
 arch/cris/Makefile                |  4 +++-
 arch/um/Makefile                  |  4 +++-
 arch/x86/boot/Makefile            |  4 ++--
 arch/x86/boot/mkcpustr.c          |  2 ++
 arch/x86/include/asm/cpufeature.h |  2 ++
 arch/x86/kernel/cpu/mkcapflags.pl |  5 ++++-
 7 files changed, 30 insertions(+), 9 deletions(-)

(limited to 'arch/x86')

diff --git a/Makefile b/Makefile
index 846dd7607854..da1707c3904b 100644
--- a/Makefile
+++ b/Makefile
@@ -350,12 +350,22 @@ AFLAGS_KERNEL	=
 CFLAGS_GCOV	= -fprofile-arcs -ftest-coverage
 
 
+# Use USERINCLUDE when you must reference the UAPI directories only.
+USERINCLUDE    := \
+		-I$(srctree)/arch/$(hdr-arch)/include/uapi \
+		-Iarch/$(hdr-arch)/include/generated/uapi \
+		-I$(srctree)/include/uapi \
+		-Iinclude/generated/uapi \
+                -include $(srctree)/include/linux/kconfig.h
+
 # Use LINUXINCLUDE when you must reference the include/ directory.
 # Needed to be compatible with the O= option
-LINUXINCLUDE    := -I$(srctree)/arch/$(hdr-arch)/include \
-                   -Iarch/$(hdr-arch)/include/generated -Iinclude \
-                   $(if $(KBUILD_SRC), -I$(srctree)/include) \
-                   -include $(srctree)/include/linux/kconfig.h
+LINUXINCLUDE    := \
+		-I$(srctree)/arch/$(hdr-arch)/include \
+		-Iarch/$(hdr-arch)/include/generated \
+		$(if $(KBUILD_SRC), -I$(srctree)/include) \
+		-Iinclude \
+		$(USERINCLUDE)
 
 KBUILD_CPPFLAGS := -D__KERNEL__
 
diff --git a/arch/cris/Makefile b/arch/cris/Makefile
index 29c2ceb38a76..39dc7d00083e 100644
--- a/arch/cris/Makefile
+++ b/arch/cris/Makefile
@@ -23,7 +23,9 @@ mach-$(CONFIG_ETRAXFS) := fs
 
 ifneq ($(arch-y),)
 SARCH := arch-$(arch-y)
-inc := -Iarch/cris/include/$(SARCH)
+inc := -Iarch/cris/include/uapi/$(SARCH)
+inc += -Iarch/cris/include/$(SARCH)
+inc += -Iarch/cris/include/uapi/$(SARCH)/arch
 inc += -Iarch/cris/include/$(SARCH)/arch
 else
 SARCH :=
diff --git a/arch/um/Makefile b/arch/um/Makefile
index 097091059aaa..133f7de2a13d 100644
--- a/arch/um/Makefile
+++ b/arch/um/Makefile
@@ -66,7 +66,9 @@ USER_CFLAGS = $(patsubst $(KERNEL_DEFINES),,$(patsubst -D__KERNEL__,,\
 include $(srctree)/$(ARCH_DIR)/Makefile-os-$(OS)
 
 KBUILD_CPPFLAGS += -I$(srctree)/$(HOST_DIR)/include \
-		   -I$(HOST_DIR)/include/generated
+		   -I$(srctree)/$(HOST_DIR)/include/uapi \
+		   -I$(HOST_DIR)/include/generated \
+		   -I$(HOST_DIR)/include/generated/uapi
 
 # -Derrno=kernel_errno - This turns all kernel references to errno into
 # kernel_errno to separate them from the libc errno.  This allows -fno-common
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index f7535bedc33f..ce03476d8c8f 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,7 @@ setup-y		+= video-bios.o
 targets		+= $(setup-y)
 hostprogs-y	:= mkcpustr tools/build
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(LINUXINCLUDE) \
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
 	            -D__EXPORTED_HEADERS__
 
 $(obj)/cpu.o: $(obj)/cpustr.h
@@ -52,7 +52,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE
 
 # How to compile the 16-bit code.  Note we always compile for -march=i386,
 # that way we can complain to the user if the CPU is insufficient.
-KBUILD_CFLAGS	:= $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
+KBUILD_CFLAGS	:= $(USERINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \
 		   -DDISABLE_BRANCH_PROFILING \
 		   -Wall -Wstrict-prototypes \
 		   -march=i386 -mregparm=3 \
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index 919257f526f2..4579eff0ef4d 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,6 +15,8 @@
 
 #include <stdio.h>
 
+#include "../include/asm/required-features.h"
+#include "../include/asm/cpufeature.h"
 #include "../kernel/cpu/capflags.c"
 
 int main(void)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index 16cae425d1f8..8c297aa53eef 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -4,7 +4,9 @@
 #ifndef _ASM_X86_CPUFEATURE_H
 #define _ASM_X86_CPUFEATURE_H
 
+#ifndef _ASM_X86_REQUIRED_FEATURES_H
 #include <asm/required-features.h>
+#endif
 
 #define NCAPINTS	10	/* N 32-bit words worth of info */
 
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
index c7b3fe2d72e0..091972ef49de 100644
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -8,7 +8,10 @@
 open(IN, "< $in\0")   or die "$0: cannot open: $in: $!\n";
 open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
 
-print OUT "#include <asm/cpufeature.h>\n\n";
+print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
+print OUT "#include <asm/cpufeature.h>\n";
+print OUT "#endif\n";
+print OUT "\n";
 print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
 
 %features = ();
-- 
cgit v1.2.3


From 4413e16d9d21673bb5048a2e542f1aaa00015c2e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:35 +0100
Subject: UAPI: (Scripted) Set up UAPI Kbuild files

Set up empty UAPI Kbuild files to be populated by the header splitter.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/alpha/include/uapi/asm/Kbuild          |  3 +++
 arch/arm/include/uapi/asm/Kbuild            |  3 +++
 arch/arm64/include/uapi/asm/Kbuild          |  3 +++
 arch/avr32/include/uapi/asm/Kbuild          |  3 +++
 arch/blackfin/include/uapi/asm/Kbuild       |  3 +++
 arch/c6x/include/uapi/asm/Kbuild            |  3 +++
 arch/cris/include/uapi/arch-v10/arch/Kbuild |  1 +
 arch/cris/include/uapi/arch-v32/arch/Kbuild |  1 +
 arch/cris/include/uapi/asm/Kbuild           |  5 +++++
 arch/frv/include/uapi/asm/Kbuild            |  3 +++
 arch/h8300/include/uapi/asm/Kbuild          |  3 +++
 arch/hexagon/include/uapi/asm/Kbuild        |  3 +++
 arch/ia64/include/uapi/asm/Kbuild           |  3 +++
 arch/m32r/include/uapi/asm/Kbuild           |  3 +++
 arch/m68k/include/uapi/asm/Kbuild           |  3 +++
 arch/microblaze/include/uapi/asm/Kbuild     |  3 +++
 arch/mips/include/uapi/asm/Kbuild           |  3 +++
 arch/mn10300/include/uapi/asm/Kbuild        |  3 +++
 arch/openrisc/include/uapi/asm/Kbuild       |  3 +++
 arch/parisc/include/uapi/asm/Kbuild         |  3 +++
 arch/powerpc/include/uapi/asm/Kbuild        |  3 +++
 arch/s390/include/uapi/asm/Kbuild           |  3 +++
 arch/score/include/uapi/asm/Kbuild          |  3 +++
 arch/sh/include/uapi/asm/Kbuild             |  3 +++
 arch/sparc/include/uapi/asm/Kbuild          |  5 +++++
 arch/tile/include/uapi/arch/Kbuild          |  1 +
 arch/tile/include/uapi/asm/Kbuild           |  3 +++
 arch/unicore32/include/uapi/asm/Kbuild      |  3 +++
 arch/x86/include/uapi/asm/Kbuild            |  6 ++++++
 arch/xtensa/include/uapi/asm/Kbuild         |  3 +++
 include/uapi/Kbuild                         | 14 ++++++++++++++
 include/uapi/asm-generic/Kbuild             |  1 +
 include/uapi/drm/Kbuild                     |  1 +
 include/uapi/linux/Kbuild                   | 22 ++++++++++++++++++++++
 include/uapi/linux/byteorder/Kbuild         |  1 +
 include/uapi/linux/caif/Kbuild              |  1 +
 include/uapi/linux/can/Kbuild               |  1 +
 include/uapi/linux/dvb/Kbuild               |  1 +
 include/uapi/linux/hdlc/Kbuild              |  1 +
 include/uapi/linux/hsi/Kbuild               |  1 +
 include/uapi/linux/isdn/Kbuild              |  1 +
 include/uapi/linux/mmc/Kbuild               |  1 +
 include/uapi/linux/netfilter/Kbuild         |  2 ++
 include/uapi/linux/netfilter/ipset/Kbuild   |  1 +
 include/uapi/linux/netfilter_arp/Kbuild     |  1 +
 include/uapi/linux/netfilter_bridge/Kbuild  |  1 +
 include/uapi/linux/netfilter_ipv4/Kbuild    |  1 +
 include/uapi/linux/netfilter_ipv6/Kbuild    |  1 +
 include/uapi/linux/nfsd/Kbuild              |  1 +
 include/uapi/linux/raid/Kbuild              |  1 +
 include/uapi/linux/spi/Kbuild               |  1 +
 include/uapi/linux/sunrpc/Kbuild            |  1 +
 include/uapi/linux/tc_act/Kbuild            |  1 +
 include/uapi/linux/tc_ematch/Kbuild         |  1 +
 include/uapi/linux/usb/Kbuild               |  1 +
 include/uapi/linux/wimax/Kbuild             |  1 +
 include/uapi/mtd/Kbuild                     |  1 +
 include/uapi/rdma/Kbuild                    |  1 +
 include/uapi/scsi/Kbuild                    |  2 ++
 include/uapi/scsi/fc/Kbuild                 |  1 +
 include/uapi/sound/Kbuild                   |  1 +
 include/uapi/video/Kbuild                   |  1 +
 include/uapi/xen/Kbuild                     |  1 +
 63 files changed, 160 insertions(+)
 create mode 100644 arch/alpha/include/uapi/asm/Kbuild
 create mode 100644 arch/arm/include/uapi/asm/Kbuild
 create mode 100644 arch/arm64/include/uapi/asm/Kbuild
 create mode 100644 arch/avr32/include/uapi/asm/Kbuild
 create mode 100644 arch/blackfin/include/uapi/asm/Kbuild
 create mode 100644 arch/c6x/include/uapi/asm/Kbuild
 create mode 100644 arch/cris/include/uapi/arch-v10/arch/Kbuild
 create mode 100644 arch/cris/include/uapi/arch-v32/arch/Kbuild
 create mode 100644 arch/cris/include/uapi/asm/Kbuild
 create mode 100644 arch/frv/include/uapi/asm/Kbuild
 create mode 100644 arch/h8300/include/uapi/asm/Kbuild
 create mode 100644 arch/hexagon/include/uapi/asm/Kbuild
 create mode 100644 arch/ia64/include/uapi/asm/Kbuild
 create mode 100644 arch/m32r/include/uapi/asm/Kbuild
 create mode 100644 arch/m68k/include/uapi/asm/Kbuild
 create mode 100644 arch/microblaze/include/uapi/asm/Kbuild
 create mode 100644 arch/mips/include/uapi/asm/Kbuild
 create mode 100644 arch/mn10300/include/uapi/asm/Kbuild
 create mode 100644 arch/openrisc/include/uapi/asm/Kbuild
 create mode 100644 arch/parisc/include/uapi/asm/Kbuild
 create mode 100644 arch/powerpc/include/uapi/asm/Kbuild
 create mode 100644 arch/s390/include/uapi/asm/Kbuild
 create mode 100644 arch/score/include/uapi/asm/Kbuild
 create mode 100644 arch/sh/include/uapi/asm/Kbuild
 create mode 100644 arch/sparc/include/uapi/asm/Kbuild
 create mode 100644 arch/tile/include/uapi/arch/Kbuild
 create mode 100644 arch/tile/include/uapi/asm/Kbuild
 create mode 100644 arch/unicore32/include/uapi/asm/Kbuild
 create mode 100644 arch/x86/include/uapi/asm/Kbuild
 create mode 100644 arch/xtensa/include/uapi/asm/Kbuild
 create mode 100644 include/uapi/Kbuild
 create mode 100644 include/uapi/asm-generic/Kbuild
 create mode 100644 include/uapi/drm/Kbuild
 create mode 100644 include/uapi/linux/Kbuild
 create mode 100644 include/uapi/linux/byteorder/Kbuild
 create mode 100644 include/uapi/linux/caif/Kbuild
 create mode 100644 include/uapi/linux/can/Kbuild
 create mode 100644 include/uapi/linux/dvb/Kbuild
 create mode 100644 include/uapi/linux/hdlc/Kbuild
 create mode 100644 include/uapi/linux/hsi/Kbuild
 create mode 100644 include/uapi/linux/isdn/Kbuild
 create mode 100644 include/uapi/linux/mmc/Kbuild
 create mode 100644 include/uapi/linux/netfilter/Kbuild
 create mode 100644 include/uapi/linux/netfilter/ipset/Kbuild
 create mode 100644 include/uapi/linux/netfilter_arp/Kbuild
 create mode 100644 include/uapi/linux/netfilter_bridge/Kbuild
 create mode 100644 include/uapi/linux/netfilter_ipv4/Kbuild
 create mode 100644 include/uapi/linux/netfilter_ipv6/Kbuild
 create mode 100644 include/uapi/linux/nfsd/Kbuild
 create mode 100644 include/uapi/linux/raid/Kbuild
 create mode 100644 include/uapi/linux/spi/Kbuild
 create mode 100644 include/uapi/linux/sunrpc/Kbuild
 create mode 100644 include/uapi/linux/tc_act/Kbuild
 create mode 100644 include/uapi/linux/tc_ematch/Kbuild
 create mode 100644 include/uapi/linux/usb/Kbuild
 create mode 100644 include/uapi/linux/wimax/Kbuild
 create mode 100644 include/uapi/mtd/Kbuild
 create mode 100644 include/uapi/rdma/Kbuild
 create mode 100644 include/uapi/scsi/Kbuild
 create mode 100644 include/uapi/scsi/fc/Kbuild
 create mode 100644 include/uapi/sound/Kbuild
 create mode 100644 include/uapi/video/Kbuild
 create mode 100644 include/uapi/xen/Kbuild

(limited to 'arch/x86')

diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/alpha/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/arm64/include/uapi/asm/Kbuild b/arch/arm64/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/arm64/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/avr32/include/uapi/asm/Kbuild b/arch/avr32/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/avr32/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/blackfin/include/uapi/asm/Kbuild b/arch/blackfin/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/blackfin/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/cris/include/uapi/arch-v10/arch/Kbuild b/arch/cris/include/uapi/arch-v10/arch/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/arch/cris/include/uapi/arch-v10/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/cris/include/uapi/arch-v32/arch/Kbuild b/arch/cris/include/uapi/arch-v32/arch/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/arch/cris/include/uapi/arch-v32/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..f50236ae9ca3
--- /dev/null
+++ b/arch/cris/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+header-y += arch-v10/
+header-y += arch-v32/
diff --git a/arch/frv/include/uapi/asm/Kbuild b/arch/frv/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/frv/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/hexagon/include/uapi/asm/Kbuild b/arch/hexagon/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/hexagon/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/m32r/include/uapi/asm/Kbuild b/arch/m32r/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/m32r/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/mips/include/uapi/asm/Kbuild b/arch/mips/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/mips/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/mn10300/include/uapi/asm/Kbuild b/arch/mn10300/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/mn10300/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/score/include/uapi/asm/Kbuild b/arch/score/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/score/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/sparc/include/uapi/asm/Kbuild b/arch/sparc/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..7518ad286963
--- /dev/null
+++ b/arch/sparc/include/uapi/asm/Kbuild
@@ -0,0 +1,5 @@
+# UAPI Header export list
+# User exported sparc header files
+
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/tile/include/uapi/arch/Kbuild b/arch/tile/include/uapi/arch/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/arch/tile/include/uapi/arch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/arch/tile/include/uapi/asm/Kbuild b/arch/tile/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/tile/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/arch/x86/include/uapi/asm/Kbuild b/arch/x86/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..83b6e9a0dce4
--- /dev/null
+++ b/arch/x86/include/uapi/asm/Kbuild
@@ -0,0 +1,6 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
+genhdr-y += unistd_32.h
+genhdr-y += unistd_64.h
+genhdr-y += unistd_x32.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
new file mode 100644
index 000000000000..baebb3da1d44
--- /dev/null
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+include include/uapi/asm-generic/Kbuild.asm
+
diff --git a/include/uapi/Kbuild b/include/uapi/Kbuild
new file mode 100644
index 000000000000..81d2106287fe
--- /dev/null
+++ b/include/uapi/Kbuild
@@ -0,0 +1,14 @@
+# UAPI Header export list
+# Top-level Makefile calls into asm-$(ARCH)
+# List only non-arch directories below
+
+
+header-y += asm-generic/
+header-y += linux/
+header-y += sound/
+header-y += mtd/
+header-y += rdma/
+header-y += video/
+header-y += drm/
+header-y += xen/
+header-y += scsi/
diff --git a/include/uapi/asm-generic/Kbuild b/include/uapi/asm-generic/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/asm-generic/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/drm/Kbuild b/include/uapi/drm/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/drm/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
new file mode 100644
index 000000000000..13a9cf4cb6fe
--- /dev/null
+++ b/include/uapi/linux/Kbuild
@@ -0,0 +1,22 @@
+# UAPI Header export list
+header-y += byteorder/
+header-y += can/
+header-y += caif/
+header-y += dvb/
+header-y += hdlc/
+header-y += hsi/
+header-y += isdn/
+header-y += mmc/
+header-y += nfsd/
+header-y += raid/
+header-y += spi/
+header-y += sunrpc/
+header-y += tc_act/
+header-y += tc_ematch/
+header-y += netfilter/
+header-y += netfilter_arp/
+header-y += netfilter_bridge/
+header-y += netfilter_ipv4/
+header-y += netfilter_ipv6/
+header-y += usb/
+header-y += wimax/
diff --git a/include/uapi/linux/byteorder/Kbuild b/include/uapi/linux/byteorder/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/byteorder/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/caif/Kbuild b/include/uapi/linux/caif/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/caif/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/can/Kbuild b/include/uapi/linux/can/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/can/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/dvb/Kbuild b/include/uapi/linux/dvb/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/dvb/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/hdlc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/hsi/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/isdn/Kbuild b/include/uapi/linux/isdn/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/isdn/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/mmc/Kbuild b/include/uapi/linux/mmc/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/mmc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild
new file mode 100644
index 000000000000..4afbace8e869
--- /dev/null
+++ b/include/uapi/linux/netfilter/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += ipset/
diff --git a/include/uapi/linux/netfilter/ipset/Kbuild b/include/uapi/linux/netfilter/ipset/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/netfilter/ipset/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_arp/Kbuild b/include/uapi/linux/netfilter_arp/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/netfilter_arp/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_bridge/Kbuild b/include/uapi/linux/netfilter_bridge/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/netfilter_bridge/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_ipv4/Kbuild b/include/uapi/linux/netfilter_ipv4/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/netfilter_ipv4/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/netfilter_ipv6/Kbuild b/include/uapi/linux/netfilter_ipv6/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/netfilter_ipv6/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/nfsd/Kbuild b/include/uapi/linux/nfsd/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/nfsd/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/raid/Kbuild b/include/uapi/linux/raid/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/raid/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/spi/Kbuild b/include/uapi/linux/spi/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/spi/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/sunrpc/Kbuild b/include/uapi/linux/sunrpc/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/sunrpc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/tc_ematch/Kbuild b/include/uapi/linux/tc_ematch/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/tc_ematch/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/usb/Kbuild b/include/uapi/linux/usb/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/usb/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/linux/wimax/Kbuild b/include/uapi/linux/wimax/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/linux/wimax/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/mtd/Kbuild b/include/uapi/mtd/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/mtd/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/rdma/Kbuild b/include/uapi/rdma/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/rdma/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/scsi/Kbuild b/include/uapi/scsi/Kbuild
new file mode 100644
index 000000000000..29a87dd26cfb
--- /dev/null
+++ b/include/uapi/scsi/Kbuild
@@ -0,0 +1,2 @@
+# UAPI Header export list
+header-y += fc/
diff --git a/include/uapi/scsi/fc/Kbuild b/include/uapi/scsi/fc/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/scsi/fc/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/sound/Kbuild b/include/uapi/sound/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/sound/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/video/Kbuild b/include/uapi/video/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/video/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
diff --git a/include/uapi/xen/Kbuild b/include/uapi/xen/Kbuild
new file mode 100644
index 000000000000..aafaa5aa54d4
--- /dev/null
+++ b/include/uapi/xen/Kbuild
@@ -0,0 +1 @@
+# UAPI Header export list
-- 
cgit v1.2.3


From 584c5ef813c63354ed9e03b732048240c09b86af Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Fix the test_get_len tool

Fix the x86 test_get_len tool to have the right include paths in the right
order (it includes a non-exported kernel header directly), otherwise errors
like the following occur:

/data/fs/linux-2.6-hdr/include/linux/types.h:18:26: error: conflicting types for 'fd_set'
/usr/include/sys/select.h:78:5: note: previous declaration of 'fd_set' was here

and

/data/fs/linux-2.6-hdr/include/linux/string.h:42:12: error: expected identifier or '(' before '__extension__'

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/tools/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/tools/Makefile b/arch/x86/tools/Makefile
index 733057b435b0..bae601f900ef 100644
--- a/arch/x86/tools/Makefile
+++ b/arch/x86/tools/Makefile
@@ -28,7 +28,7 @@ posttest: $(obj)/test_get_len vmlinux $(obj)/insn_sanity
 hostprogs-y	+= test_get_len insn_sanity
 
 # -I needed for generated C source and C source which in the kernel tree.
-HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+HOSTCFLAGS_test_get_len.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
 
 HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
 
-- 
cgit v1.2.3


From c0522b6cc1237c935b2cead3fa7b45465df2f839 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Fix insn_sanity build failure after UAPI split

Fix a build failure in the x86 insn_sanity program after the UAPI split.  The
problem is that insn_sanity.c #includes arch/x86/lib/insn.c - which uses the
kernel string header.  This leads to conflicts for various definitions against
the /usr/include/ headers.

linux/string.h can be replaced with the normal userspace string.h if __KERNEL__
is not specified.

  HOSTCC  arch/x86/tools/insn_sanity
In file included from /data/fs/linux-2.6-hdr/include/linux/string.h:6:0,
                 from /data/fs/linux-2.6-hdr/arch/x86/lib/insn.c:21,
                 from arch/x86/tools/insn_sanity.c:36:
/data/fs/linux-2.6-hdr/include/linux/types.h:14:26: error: conflicting types for 'fd_set'
/usr/include/sys/select.h:76:5: note: previous declaration of 'fd_set' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:15:25: error: conflicting types for 'dev_t'
/usr/include/sys/types.h:61:17: note: previous declaration of 'dev_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:25:26: error: conflicting types for 'timer_t'
/usr/include/time.h:104:19: note: previous declaration of 'timer_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:45:26: error: conflicting types for 'loff_t'
/usr/include/sys/types.h:45:18: note: previous declaration of 'loff_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:112:17: error: conflicting types for 'u_int64_t'
/usr/include/sys/types.h:204:1: note: previous declaration of 'u_int64_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:113:17: error: conflicting types for 'int64_t'
/usr/include/sys/types.h:198:1: note: previous declaration of 'int64_t' was here
/data/fs/linux-2.6-hdr/include/linux/types.h:134:23: error: conflicting types for 'blkcnt_t'
/usr/include/sys/types.h:236:20: note: previous declaration of 'blkcnt_t' was here
In file included from /data/fs/linux-2.6-hdr/arch/x86/lib/insn.c:21:0,
                 from arch/x86/tools/insn_sanity.c:36:
/data/fs/linux-2.6-hdr/include/linux/string.h:38:12: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:38:12: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:41:12: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:53:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:61:28: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'skip_spaces'
/data/fs/linux-2.6-hdr/include/linux/string.h:65:28: error: expected '=', ',', ';', 'asm' or '__attribute__' before 'char'
/data/fs/linux-2.6-hdr/include/linux/string.h:83:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:83:15: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:86:15: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:86:15: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:89:24: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:89:24: error: expected identifier or '(' before ')' token
/data/fs/linux-2.6-hdr/include/linux/string.h:92:24: error: expected identifier or '(' before '__extension__'
/data/fs/linux-2.6-hdr/include/linux/string.h:92:24: error: expected identifier or '(' before ')' token

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/lib/insn.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/lib/insn.c b/arch/x86/lib/insn.c
index b1e6c4b2e8eb..54fcffed28ed 100644
--- a/arch/x86/lib/insn.c
+++ b/arch/x86/lib/insn.c
@@ -18,7 +18,11 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#ifdef __KERNEL__
 #include <linux/string.h>
+#else
+#include <string.h>
+#endif
 #include <asm/inat.h>
 #include <asm/insn.h>
 
-- 
cgit v1.2.3


From ec28b7f250b19f31e14b69b015d61d0818bf43a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:56 +0100
Subject: UAPI: x86: Differentiate the generated UAPI and internal headers

Differentiate the generated UAPI and internal headers during generation such
that the UAPI headers can be installed elsewhere.

A later patch will use this to move the UAPI headers to:

	arch/x86/include/generated/uapi/asm/

to make them easier to handle.

A previous patch added a -I for this path.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 arch/x86/syscalls/Makefile | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 3236aebc828d..174b03248418 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -1,7 +1,9 @@
 out := $(obj)/../include/generated/asm
+uapi := $(obj)/../include/generated/asm
 
 # Create output directory if not already present
-_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
+_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
+	  $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
 
 syscall32 := $(srctree)/$(src)/syscall_32.tbl
 syscall64 := $(srctree)/$(src)/syscall_64.tbl
@@ -18,7 +20,7 @@ quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
 
 syshdr_abi_unistd_32 := i386
-$(out)/unistd_32.h: $(syscall32) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall32) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_32_ia32 := i386
@@ -28,11 +30,11 @@ $(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
 
 syshdr_abi_unistd_x32 := common,x32
 syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
-$(out)/unistd_x32.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_x32.h: $(syscall64) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_64 := common,64
-$(out)/unistd_64.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall64) $(syshdr)
 	$(call if_changed,syshdr)
 
 syshdr_abi_unistd_64_x32 := x32
@@ -45,11 +47,12 @@ $(out)/syscalls_32.h: $(syscall32) $(systbl)
 $(out)/syscalls_64.h: $(syscall64) $(systbl)
 	$(call if_changed,systbl)
 
-syshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
+uapisyshdr-y			+= unistd_32.h unistd_64.h unistd_x32.h
 syshdr-y			+= syscalls_32.h
 syshdr-$(CONFIG_X86_64)		+= unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)		+= syscalls_64.h
 
-targets	+= $(syshdr-y)
+targets	+= $(uapisyshdr-y) $(syshdr-y)
 
-all: $(addprefix $(out)/,$(targets))
+all: $(addprefix $(uapi)/,$(uapisyshdr-y))
+all: $(addprefix $(out)/,$(syshdr-y))
-- 
cgit v1.2.3


From 10b63956fce7f369cc37fd4d994f09bd5203efe4 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 2 Oct 2012 18:01:57 +0100
Subject: UAPI: Plumb the UAPI Kbuilds into the user header installation and
 checking

Plumb the UAPI Kbuilds into the user header installation and checking system.
As the headers are split the entries will be transferred across from the old
Kbuild files to the UAPI Kbuild files.

The changes made in this commit are:

 (1) Exported generated files (of which there are currently four) are moved to
     uapi/ directories under the appropriate generated/ directory, thus we
     get:

	include/generated/uapi/linux/version.h
	arch/x86/include/generated/uapi/asm/unistd_32.h
	arch/x86/include/generated/uapi/asm/unistd_64.h
	arch/x86/include/generated/uapi/asm/unistd_x32.h

     These paths were added to the build as -I flags in a previous patch.

 (2) scripts/Makefile.headersinst is now given the UAPI path to install from
     rather than the old path.

     It then determines the old path from that and includes that Kbuild also
     if it exists, thus permitting the headers to exist in either directory
     during the changeover.

     I also renamed the "install" variable to "installdir" as it refers to a
     directory not the install program.

 (3) scripts/headers_install.pl is altered to take a list of source file paths
     instead of just their names so that the makefile can tell it exactly
     where to find each file.

     For the moment, files can be obtained from one of four places for each
     output directory:

	.../include/uapi/foo/
	.../include/generated/uapi/foo/
	.../include/foo/
	.../include/generated/foo/

     The non-UAPI paths will be dropped later.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Dave Jones <davej@redhat.com>
---
 Makefile                     | 14 +++++++-------
 arch/x86/include/asm/Kbuild  |  4 ----
 arch/x86/syscalls/Makefile   |  2 +-
 include/linux/Kbuild         |  2 --
 include/uapi/linux/Kbuild    |  2 ++
 scripts/Makefile.headersinst | 45 ++++++++++++++++++++++++++++++--------------
 scripts/headers_install.pl   | 14 ++++++++------
 7 files changed, 49 insertions(+), 34 deletions(-)

(limited to 'arch/x86')

diff --git a/Makefile b/Makefile
index 1a5d315e2842..86eb6acb3978 100644
--- a/Makefile
+++ b/Makefile
@@ -447,7 +447,7 @@ asm-generic:
 # Detect when mixed targets is specified, and make a second invocation
 # of make so .config is not included in this case either (for *config).
 
-version_h := include/generated/linux/version.h
+version_h := include/generated/uapi/linux/version.h
 
 no-dot-config-targets := clean mrproper distclean \
 			 cscope gtags TAGS tags help %docs check% coccicheck \
@@ -908,10 +908,10 @@ headers_install_all:
 
 PHONY += headers_install
 headers_install: __headers
-	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/asm/Kbuild),, \
-	$(error Headers not exportable for the $(SRCARCH) architecture))
-	$(Q)$(MAKE) $(hdr-inst)=include
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/asm $(hdr-dst)
+	$(if $(wildcard $(srctree)/arch/$(hdr-arch)/include/uapi/asm/Kbuild),, \
+	  $(error Headers not exportable for the $(SRCARCH) architecture))
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst)
 
 PHONY += headers_check_all
 headers_check_all: headers_install_all
@@ -919,8 +919,8 @@ headers_check_all: headers_install_all
 
 PHONY += headers_check
 headers_check: headers_install
-	$(Q)$(MAKE) $(hdr-inst)=include HDRCHECK=1
-	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/asm $(hdr-dst) HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=include/uapi HDRCHECK=1
+	$(Q)$(MAKE) $(hdr-inst)=arch/$(hdr-arch)/include/uapi/asm $(hdr-dst) HDRCHECK=1
 
 # ---------------------------------------------------------------------------
 # Modules
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index f9c0d3ba9e84..1595d6813432 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -22,7 +22,3 @@ header-y += sigcontext32.h
 header-y += ucontext.h
 header-y += vm86.h
 header-y += vsyscall.h
-
-genhdr-y += unistd_32.h
-genhdr-y += unistd_64.h
-genhdr-y += unistd_x32.h
diff --git a/arch/x86/syscalls/Makefile b/arch/x86/syscalls/Makefile
index 174b03248418..f325af26107c 100644
--- a/arch/x86/syscalls/Makefile
+++ b/arch/x86/syscalls/Makefile
@@ -1,5 +1,5 @@
 out := $(obj)/../include/generated/asm
-uapi := $(obj)/../include/generated/asm
+uapi := $(obj)/../include/generated/uapi/asm
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 0236a3b346fe..3f4c207a93bb 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -20,8 +20,6 @@ header-y += netfilter_ipv6/
 header-y += usb/
 header-y += wimax/
 
-genhdr-y += version.h
-
 ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/asm/a.out.h \
 		  $(srctree)/include/asm-$(SRCARCH)/a.out.h \
 		  $(INSTALL_HDR_PATH)/include/asm-*/a.out.h),)
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 13a9cf4cb6fe..b0fd4d03499d 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -20,3 +20,5 @@ header-y += netfilter_ipv4/
 header-y += netfilter_ipv6/
 header-y += usb/
 header-y += wimax/
+
+genhdr-y += version.h
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 463b95acc15b..06ba4a70bd4d 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -8,7 +8,7 @@
 # ==========================================================================
 
 # called may set destination dir (when installing to asm/)
-_dst := $(if $(dst),$(dst),$(obj))
+_dst := $(or $(destination-y),$(dst),$(obj))
 
 # generated header directory
 gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
@@ -16,47 +16,64 @@ gen := $(if $(gen),$(gen),$(subst include/,include/generated/,$(obj)))
 kbuild-file := $(srctree)/$(obj)/Kbuild
 include $(kbuild-file)
 
-_dst := $(if $(destination-y),$(destination-y),$(_dst))
+old-kbuild-file := $(srctree)/$(subst uapi/,,$(obj))/Kbuild
+ifneq ($(wildcard $(old-kbuild-file)),)
+include $(old-kbuild-file)
+endif
 
 include scripts/Kbuild.include
 
-install       := $(INSTALL_HDR_PATH)/$(_dst)
+installdir    := $(INSTALL_HDR_PATH)/$(subst uapi/,,$(_dst))
 
 header-y      := $(sort $(header-y))
 subdirs       := $(patsubst %/,%,$(filter %/, $(header-y)))
 header-y      := $(filter-out %/, $(header-y))
 
 # files used to track state of install/check
-install-file  := $(install)/.install
-check-file    := $(install)/.check
+install-file  := $(installdir)/.install
+check-file    := $(installdir)/.check
 
 # generic-y list all files an architecture uses from asm-generic
 # Use this to build a list of headers which require a wrapper
 wrapper-files := $(filter $(header-y), $(generic-y))
 
+srcdir        := $(srctree)/$(obj)
+gendir        := $(objtree)/$(gen)
+
+oldsrcdir     := $(srctree)/$(subst /uapi,,$(obj))
+
 # all headers files for this dir
 header-y      := $(filter-out $(generic-y), $(header-y))
 all-files     := $(header-y) $(genhdr-y) $(wrapper-files)
-input-files   := $(addprefix $(srctree)/$(obj)/,$(header-y)) \
-                 $(addprefix $(objtree)/$(gen)/,$(genhdr-y))
-output-files  := $(addprefix $(install)/, $(all-files))
+output-files  := $(addprefix $(installdir)/, $(all-files))
+
+input-files   := $(foreach hdr, $(header-y), \
+		   $(or \
+			$(wildcard $(srcdir)/$(hdr)), \
+			$(wildcard $(oldsrcdir)/$(hdr)), \
+			$(error Missing UAPI file $(srcdir)/$(hdr)) \
+		   )) \
+		 $(foreach hdr, $(genhdr-y), \
+		   $(or \
+			$(wildcard $(gendir)/$(hdr)), \
+			$(error Missing generated UAPI file $(gendir)/$(hdr)) \
+		   ))
 
 # Work out what needs to be removed
-oldheaders    := $(patsubst $(install)/%,%,$(wildcard $(install)/*.h))
+oldheaders    := $(patsubst $(installdir)/%,%,$(wildcard $(installdir)/*.h))
 unwanted      := $(filter-out $(all-files),$(oldheaders))
 
 # Prefix unwanted with full paths to $(INSTALL_HDR_PATH)
-unwanted-file := $(addprefix $(install)/, $(unwanted))
+unwanted-file := $(addprefix $(installdir)/, $(unwanted))
 
 printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
 
 quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
                             file$(if $(word 2, $(all-files)),s))
       cmd_install = \
-        $(PERL) $< $(srctree)/$(obj) $(install) $(SRCARCH) $(header-y); \
-        $(PERL) $< $(objtree)/$(gen) $(install) $(SRCARCH) $(genhdr-y); \
+        $(PERL) $< $(installdir) $(SRCARCH) $(input-files); \
         for F in $(wrapper-files); do                                   \
-                echo "\#include <asm-generic/$$F>" > $(install)/$$F;    \
+                echo "\#include <asm-generic/$$F>" > $(installdir)/$$F;    \
         done;                                                           \
         touch $@
 
@@ -67,7 +84,7 @@ quiet_cmd_check = CHECK   $(printdir) ($(words $(all-files)) files)
 # Headers list can be pretty long, xargs helps to avoid
 # the "Argument list too long" error.
       cmd_check = for f in $(all-files); do                          \
-                  echo "$(install)/$${f}"; done                      \
+                  echo "$(installdir)/$${f}"; done                      \
                   | xargs                                            \
                   $(PERL) $< $(INSTALL_HDR_PATH)/include $(SRCARCH); \
 	          touch $@
diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
index 48462be328bb..239d22d4207b 100644
--- a/scripts/headers_install.pl
+++ b/scripts/headers_install.pl
@@ -4,8 +4,7 @@
 # user space and copy the files to their destination.
 #
 # Usage: headers_install.pl readdir installdir arch [files...]
-# readdir:    dir to open files
-# installdir: dir to install the files
+# installdir: dir to install the files to
 # arch:       current architecture
 #             arch is used to force a reinstallation when the arch
 #             changes because kbuild then detect a command line change.
@@ -18,15 +17,18 @@
 
 use strict;
 
-my ($readdir, $installdir, $arch, @files) = @ARGV;
+my ($installdir, $arch, @files) = @ARGV;
 
 my $unifdef = "scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__";
 
-foreach my $file (@files) {
+foreach my $filename (@files) {
+	my $file = $filename;
+	$file =~ s!^.*/!!;
+
 	my $tmpfile = "$installdir/$file.tmp";
 
-	open(my $in, '<', "$readdir/$file")
-	    or die "$readdir/$file: $!\n";
+	open(my $in, '<', $filename)
+	    or die "$filename: $!\n";
 	open(my $out, '>', $tmpfile)
 	    or die "$tmpfile: $!\n";
 	while (my $line = <$in>) {
-- 
cgit v1.2.3


From b1e0d8b70fa31821ebca3965f2ef8619d7c5e316 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Tue, 2 Oct 2012 16:42:36 +0200
Subject: kbuild: Fix gcc -x syntax

The correct syntax for gcc -x is "gcc -x assembler", not
"gcc -xassembler". Even though the latter happens to work, the former
is what is documented in the manual page and thus what gcc wrappers
such as icecream do expect.

This isn't a cosmetic change. The missing space prevents icecream from
recognizing compilation tasks it can't handle, leading to silent kernel
miscompilations.

Besides me, credits go to Michael Matz and Dirk Mueller for
investigating the miscompilation issue and tracking it down to this
incorrect -x parameter syntax.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Cc: Bernhard Walle <bernhard@bwalle.de>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
---
 arch/mips/Makefile                         |  2 +-
 arch/mips/kernel/Makefile                  |  2 +-
 arch/x86/Makefile                          |  2 +-
 scripts/Kbuild.include                     | 12 ++++++------
 scripts/gcc-version.sh                     |  6 +++---
 scripts/gcc-x86_32-has-stack-protector.sh  |  2 +-
 scripts/gcc-x86_64-has-stack-protector.sh  |  2 +-
 scripts/kconfig/check.sh                   |  2 +-
 scripts/kconfig/lxdialog/check-lxdialog.sh |  2 +-
 tools/perf/Makefile                        |  2 +-
 tools/power/cpupower/Makefile              |  2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/mips/Makefile b/arch/mips/Makefile
index 764e37a9dbb3..654b1ad39f05 100644
--- a/arch/mips/Makefile
+++ b/arch/mips/Makefile
@@ -225,7 +225,7 @@ KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 LDFLAGS			+= -m $(ld-emul)
 
 ifdef CONFIG_MIPS
-CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -xc /dev/null | \
+CHECKFLAGS += $(shell $(CC) $(KBUILD_CFLAGS) -dM -E -x c /dev/null | \
 	egrep -vw '__GNUC_(|MINOR_|PATCHLEVEL_)_' | \
 	sed -e "s/^\#define /-D'/" -e "s/ /'='/" -e "s/$$/'/")
 ifdef CONFIG_64BIT
diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile
index fdaf65e1a99d..c6136cb4cd40 100644
--- a/arch/mips/kernel/Makefile
+++ b/arch/mips/kernel/Makefile
@@ -104,7 +104,7 @@ obj-$(CONFIG_MIPS_MACHINE)	+= mips_machine.o
 
 obj-$(CONFIG_OF)		+= prom.o
 
-CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -xc /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
+CFLAGS_cpu-bugs64.o	= $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi)
 
 obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT)	+= 8250-platform.o
 
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index b0c5276861ec..cb8ac935df82 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -88,7 +88,7 @@ endif
 ifdef CONFIG_X86_X32
 	x32_ld_ok := $(call try-run,\
 			/bin/echo -e '1: .quad 1b' | \
-			$(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" - && \
+			$(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" - && \
 			$(OBJCOPY) -O elf32-x86-64 "$$TMP" "$$TMPO" && \
 			$(LD) -m elf32_x86_64 "$$TMPO" -o "$$TMP",y,n)
         ifeq ($(x32_ld_ok),y)
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 6a3ee981931d..8bb8d3a9f01c 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -98,24 +98,24 @@ try-run = $(shell set -e;		\
 # Usage: cflags-y += $(call as-option,-Wa$(comma)-isa=foo,)
 
 as-option = $(call try-run,\
-	$(CC) $(KBUILD_CFLAGS) $(1) -c -xassembler /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CFLAGS) $(1) -c -x assembler /dev/null -o "$$TMP",$(1),$(2))
 
 # as-instr
 # Usage: cflags-y += $(call as-instr,instr,option1,option2)
 
 as-instr = $(call try-run,\
-	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -xassembler -o "$$TMP" -,$(2),$(3))
+	printf "%b\n" "$(1)" | $(CC) $(KBUILD_AFLAGS) -c -x assembler -o "$$TMP" -,$(2),$(3))
 
 # cc-option
 # Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
 
 cc-option = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # cc-option-yn
 # Usage: flag := $(call cc-option-yn,-march=winchip-c6)
 cc-option-yn = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -xc /dev/null -o "$$TMP",y,n)
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",y,n)
 
 # cc-option-align
 # Prefix align with either -falign or -malign
@@ -125,7 +125,7 @@ cc-option-align = $(subst -functions=0,,\
 # cc-disable-warning
 # Usage: cflags-y += $(call cc-disable-warning,unused-but-set-variable)
 cc-disable-warning = $(call try-run,\
-	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -xc /dev/null -o "$$TMP",-Wno-$(strip $(1)))
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) -W$(strip $(1)) -c -x c /dev/null -o "$$TMP",-Wno-$(strip $(1)))
 
 # cc-version
 # Usage gcc-ver := $(call cc-version)
@@ -143,7 +143,7 @@ cc-ifversion = $(shell [ $(call cc-version, $(CC)) $(1) $(2) ] && echo $(3))
 # cc-ldoption
 # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both)
 cc-ldoption = $(call try-run,\
-	$(CC) $(1) -nostdlib -xc /dev/null -o "$$TMP",$(1),$(2))
+	$(CC) $(1) -nostdlib -x c /dev/null -o "$$TMP",$(1),$(2))
 
 # ld-option
 # Usage: LDFLAGS += $(call ld-option, -X)
diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
index debecb5561c4..7f2126df91f2 100644
--- a/scripts/gcc-version.sh
+++ b/scripts/gcc-version.sh
@@ -22,10 +22,10 @@ if [ ${#compiler} -eq 0 ]; then
 	exit 1
 fi
 
-MAJOR=$(echo __GNUC__ | $compiler -E -xc - | tail -n 1)
-MINOR=$(echo __GNUC_MINOR__ | $compiler -E -xc - | tail -n 1)
+MAJOR=$(echo __GNUC__ | $compiler -E -x c - | tail -n 1)
+MINOR=$(echo __GNUC_MINOR__ | $compiler -E -x c - | tail -n 1)
 if [ "x$with_patchlevel" != "x" ] ; then
-	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -xc - | tail -n 1)
+	PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -x c - | tail -n 1)
 	printf "%02d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
 else
 	printf "%02d%02d\\n" $MAJOR $MINOR
diff --git a/scripts/gcc-x86_32-has-stack-protector.sh b/scripts/gcc-x86_32-has-stack-protector.sh
index 29493dc4528d..12dbd0b11ea4 100644
--- a/scripts/gcc-x86_32-has-stack-protector.sh
+++ b/scripts/gcc-x86_32-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
diff --git a/scripts/gcc-x86_64-has-stack-protector.sh b/scripts/gcc-x86_64-has-stack-protector.sh
index afaec618b395..973e8c141567 100644
--- a/scripts/gcc-x86_64-has-stack-protector.sh
+++ b/scripts/gcc-x86_64-has-stack-protector.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-echo "int foo(void) { char X[200]; return 3; }" | $* -S -xc -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
+echo "int foo(void) { char X[200]; return 3; }" | $* -S -x c -c -O0 -mcmodel=kernel -fstack-protector - -o - 2> /dev/null | grep -q "%gs"
 if [ "$?" -eq "0" ] ; then
 	echo y
 else
diff --git a/scripts/kconfig/check.sh b/scripts/kconfig/check.sh
index fa59cbf9d62c..854d9c7c675c 100755
--- a/scripts/kconfig/check.sh
+++ b/scripts/kconfig/check.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Needed for systems without gettext
-$* -xc -o /dev/null - > /dev/null 2>&1 << EOF
+$* -x c -o /dev/null - > /dev/null 2>&1 << EOF
 #include <libintl.h>
 int main()
 {
diff --git a/scripts/kconfig/lxdialog/check-lxdialog.sh b/scripts/kconfig/lxdialog/check-lxdialog.sh
index e3b12c010417..c8e8a7154753 100644
--- a/scripts/kconfig/lxdialog/check-lxdialog.sh
+++ b/scripts/kconfig/lxdialog/check-lxdialog.sh
@@ -38,7 +38,7 @@ trap "rm -f $tmp" 0 1 2 3 15
 
 # Check if we can link to ncurses
 check() {
-        $cc -xc - -o $tmp 2>/dev/null <<'EOF'
+        $cc -x c - -o $tmp 2>/dev/null <<'EOF'
 #include CURSES_LOC
 main() {}
 EOF
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 77f124fe57ad..d9776568e8cd 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -62,7 +62,7 @@ ifeq ($(ARCH),x86_64)
 	ARCH := x86
 	IS_X86_64 := 0
 	ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
-		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -xc - | tail -n 1)
+		IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
 	endif
 	ifeq (${IS_X86_64}, 1)
 		RAW_ARCH := x86_64
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index a93e06cfcc2a..cf397bd26d0c 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -111,7 +111,7 @@ GMO_FILES = ${shell for HLANG in ${LANGUAGES}; do echo $(OUTPUT)po/$$HLANG.gmo;
 export CROSS CC AR STRIP RANLIB CFLAGS LDFLAGS LIB_OBJS
 
 # check if compiler option is supported
-cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -xc /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
+cc-supports = ${shell if $(CC) ${1} -S -o /dev/null -x c /dev/null > /dev/null 2>&1; then echo "$(1)"; fi;}
 
 # use '-Os' optimization if available, else use -O2
 OPTIMIZATION := $(call cc-supports,-Os,-O2)
-- 
cgit v1.2.3


From e7a570ff7dff9af6e54ff5e580a61ec7652137a0 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Wed, 5 Sep 2012 12:04:14 +0800
Subject: asm-generic: Add default clkdev.h

Ease the deployment of clkdev by providing a default asm/clkdev.h for
use if the arch does not have an include/asm/clkdev.h.

Due to limitations in Kbuild we manually add clkdev.h to all
architectures that don't have one rather than having the header appear
by default.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Reviewed-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/alpha/include/asm/Kbuild      |  2 ++
 arch/avr32/include/asm/Kbuild      |  2 ++
 arch/cris/include/asm/Kbuild       |  2 ++
 arch/frv/include/asm/Kbuild        |  1 +
 arch/h8300/include/asm/Kbuild      |  2 ++
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  2 ++
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mn10300/include/asm/Kbuild    |  2 ++
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  2 ++
 arch/score/include/asm/Kbuild      |  2 ++
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  2 +-
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  2 ++
 arch/xtensa/include/asm/Kbuild     |  2 ++
 include/asm-generic/clkdev.h       | 28 ++++++++++++++++++++++++++++
 23 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/clkdev.h

(limited to 'arch/x86')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index e423defed91e..d97d66334e6f 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -1,5 +1,7 @@
 include include/asm-generic/Kbuild.asm
 
+generic-y += clkdev.h
+
 header-y += compiler.h
 header-y += console.h
 header-y += fpu.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 3136628ba8d2..e3ba7bca06fa 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -1,3 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
+generic-y	+= clkdev.h
+
 header-y	+= cachectl.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 04d02a51c5e9..a8eab26a1ec7 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -7,3 +7,5 @@ header-y += ethernet.h
 header-y += etraxgpio.h
 header-y += rs485.h
 header-y += sync_serial.h
+
+generic-y += clkdev.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 5be6663cfee5..13cd044aabdf 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,3 +2,4 @@ include include/asm-generic/Kbuild.asm
 
 header-y += registers.h
 header-y += termios.h
+generic-y += clkdev.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index c68e1680da01..0e152a93c125 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= clkdev.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 06906427c0ac..3364b6966d26 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -7,6 +7,7 @@ header-y += user.h
 generic-y += auxvec.h
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += device.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index d4eb9383f5f6..58f3d14a6cd4 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -13,3 +13,4 @@ header-y += ptrace_offsets.h
 header-y += rse.h
 header-y += ucontext.h
 header-y += ustack.h
+generic-y += clkdev.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index c68e1680da01..0e152a93c125 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y	+= clkdev.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index a74e5d95c384..bfe675f0faee 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -2,6 +2,7 @@ include include/asm-generic/Kbuild.asm
 header-y += cachectl.h
 
 generic-y += bitsperlong.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += emergency-restart.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index db5294c30caf..48510f6cec8f 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -1,3 +1,4 @@
 include include/asm-generic/Kbuild.asm
 
 header-y  += elf.h
+generic-y += clkdev.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index c68e1680da01..0d20f5526dd8 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y += clkdev.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 0922959663a0..7140b6b26441 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += bug.h
 generic-y += bugs.h
 generic-y += cacheflush.h
 generic-y += checksum.h
+generic-y += clkdev.h
 generic-y += cmpxchg.h
 generic-y += cmpxchg-local.h
 generic-y += cputime.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 4383707d9801..0587f62e5b76 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -1,4 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
 header-y += pdc.h
+generic-y += clkdev.h
 generic-y += word-at-a-time.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7e313f1ed183..ace53dbde2cd 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -35,4 +35,5 @@ header-y += types.h
 header-y += ucontext.h
 header-y += unistd.h
 
+generic-y += clkdev.h
 generic-y += rwsem.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 287d7bbb6d36..f18fc796beef 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -13,3 +13,5 @@ header-y += tape390.h
 header-y += ucontext.h
 header-y += vtoc.h
 header-y += zcrypt.h
+
+generic-y += clkdev.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index b367abd4620f..ec697aeefd05 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -1,3 +1,5 @@
 include include/asm-generic/Kbuild.asm
 
 header-y +=
+
+generic-y += clkdev.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index 67f83e0a0d68..f80ff93f6f75 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -17,6 +17,7 @@ header-y += uctx.h
 header-y += utrap.h
 header-y += watchdog.h
 
+generic-y += clkdev.h
 generic-y += div64.h
 generic-y += local64.h
 generic-y += irq_regs.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 5bd71994452d..ea2e8ea3eb61 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -8,6 +8,7 @@ header-y += hardwall.h
 
 generic-y += bug.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += div64.h
 generic-y += emergency-restart.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index fff24352255d..0f6e7b328265 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -1,4 +1,4 @@
 generic-y += bug.h cputime.h device.h emergency-restart.h futex.h hardirq.h
 generic-y += hw_irq.h irq_regs.h kdebug.h percpu.h sections.h topology.h xor.h
 generic-y += ftrace.h pci.h io.h param.h delay.h mutex.h current.h exec.h
-generic-y += switch_to.h
+generic-y += switch_to.h clkdev.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 34b789b71115..123c59a06c14 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += atomic.h
 generic-y += auxvec.h
 generic-y += bitsperlong.h
 generic-y += bugs.h
+generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += current.h
 generic-y += device.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index f9c0d3ba9e84..66e5f0ef0523 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -26,3 +26,5 @@ header-y += vsyscall.h
 genhdr-y += unistd_32.h
 genhdr-y += unistd_64.h
 genhdr-y += unistd_x32.h
+
+generic-y += clkdev.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c68e1680da01..0d20f5526dd8 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -1 +1,3 @@
 include include/asm-generic/Kbuild.asm
+
+generic-y += clkdev.h
diff --git a/include/asm-generic/clkdev.h b/include/asm-generic/clkdev.h
new file mode 100644
index 000000000000..90a32a61dd21
--- /dev/null
+++ b/include/asm-generic/clkdev.h
@@ -0,0 +1,28 @@
+/*
+ *  include/asm-generic/clkdev.h
+ *
+ * Based on the ARM clkdev.h:
+ *  Copyright (C) 2008 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Helper for the clk API to assist looking up a struct clk.
+ */
+#ifndef __ASM_CLKDEV_H
+#define __ASM_CLKDEV_H
+
+#include <linux/slab.h>
+
+struct clk;
+
+static inline int __clk_get(struct clk *clk) { return 1; }
+static inline void __clk_put(struct clk *clk) { }
+
+static inline struct clk_lookup_alloc *__clkdev_alloc(size_t size)
+{
+	return kzalloc(size, GFP_KERNEL);
+}
+
+#endif
-- 
cgit v1.2.3


From c9f97a27ceee84998999bf3341e6d5d207b05539 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Wed, 19 Sep 2012 09:40:30 +0300
Subject: crypto: x86/glue_helper - fix storing of new IV in CBC encryption

Glue_helper incorrectly XORs new IV over old IV at end of CBC encryption
function when it should store. This causes CBC encryption to give
incorrect output on multi-page encryption requests.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/glue_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 4854f0f31e4f..30b3927bd733 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -110,7 +110,7 @@ static unsigned int __glue_cbc_encrypt_128bit(const common_glue_func_t fn,
 		nbytes -= bsize;
 	} while (nbytes >= bsize);
 
-	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
+	*(u128 *)walk->iv = *iv;
 	return nbytes;
 }
 
-- 
cgit v1.2.3


From e717bf4e4fe8adc519f25c4ff93ee50ed0a36710 Mon Sep 17 00:00:00 2001
From: Vince Weaver <vincent.weaver@maine.edu>
Date: Wed, 26 Sep 2012 14:12:52 -0400
Subject: perf/x86: Add support for Intel Xeon-Phi Knights Corner PMU

The following patch adds perf_event support for the Xeon-Phi
PMU, as documented in the "Intel Xeon Phi Coprocessor (codename:
Knights Corner) Performance Monitoring Units" manual.

Even though it is a co-processor, a Phi runs a full Linux
environment and can support performance counters.

This is just barebones support, it does not add support for
interesting new features such as the SPFLT intruction that
allows starting/stopping events without entering the kernel.

The PMU internally is just like that of an original Pentium, but
a "P6-like" MSR interface is provided.  The interface is
different enough from a real P6 that it's not easy (or
practical) to re-use the code in  perf_event_p6.c

Acked-by: Lawrence F Meadows <lawrence.f.meadows@intel.com>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Vince Weaver <vincent.weaver@maine.edu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Cc: eranian@gmail.com
Cc: Lawrence F <lawrence.f.meadows@intel.com>
Link: http://lkml.kernel.org/r/alpine.DEB.2.02.1209261405320.8398@vincent-weaver-1.um.maine.edu
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/msr-index.h       |   5 +
 arch/x86/kernel/cpu/Makefile           |   2 +-
 arch/x86/kernel/cpu/perf_event.h       |   2 +
 arch/x86/kernel/cpu/perf_event_intel.c |   2 +
 arch/x86/kernel/cpu/perf_event_knc.c   | 248 +++++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/perfctr-watchdog.c |   4 +
 6 files changed, 262 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_knc.c

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 957ec87385af..07f96cb5cdb9 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -121,6 +121,11 @@
 #define MSR_P6_EVNTSEL0			0x00000186
 #define MSR_P6_EVNTSEL1			0x00000187
 
+#define MSR_KNC_PERFCTR0               0x00000020
+#define MSR_KNC_PERFCTR1               0x00000021
+#define MSR_KNC_EVNTSEL0               0x00000028
+#define MSR_KNC_EVNTSEL1               0x00000029
+
 /* AMD64 MSRs. Not complete. See the architecture manual for a more
    complete list. */
 
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index d30a6a9a0121..a0e067d3d96c 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -32,7 +32,7 @@ obj-$(CONFIG_PERF_EVENTS)		+= perf_event.o
 
 ifdef CONFIG_PERF_EVENTS
 obj-$(CONFIG_CPU_SUP_AMD)		+= perf_event_amd.o
-obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_p4.o
+obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_p6.o perf_event_knc.o perf_event_p4.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
 obj-$(CONFIG_CPU_SUP_INTEL)		+= perf_event_intel_uncore.o
 endif
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 8b6defe7eefc..271d25700297 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -626,6 +626,8 @@ int p4_pmu_init(void);
 
 int p6_pmu_init(void);
 
+int knc_pmu_init(void);
+
 #else /* CONFIG_CPU_SUP_INTEL */
 
 static inline void reserve_ds_buffers(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6bca492b8547..324bb523d9d9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1906,6 +1906,8 @@ __init int intel_pmu_init(void)
 		switch (boot_cpu_data.x86) {
 		case 0x6:
 			return p6_pmu_init();
+		case 0xb:
+			return knc_pmu_init();
 		case 0xf:
 			return p4_pmu_init();
 		}
diff --git a/arch/x86/kernel/cpu/perf_event_knc.c b/arch/x86/kernel/cpu/perf_event_knc.c
new file mode 100644
index 000000000000..7c46bfdbc373
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_knc.c
@@ -0,0 +1,248 @@
+/* Driver for Intel Xeon Phi "Knights Corner" PMU */
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "perf_event.h"
+
+static const u64 knc_perfmon_event_map[] =
+{
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x002a,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x0016,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x0028,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x0029,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x0012,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x002b,
+};
+
+static __initconst u64 knc_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		/* On Xeon Phi event "0" is a valid DATA_READ          */
+		/*   (L1 Data Cache Reads) Instruction.                */
+		/* We code this as ARCH_PERFMON_EVENTSEL_INT as this   */
+		/* bit will always be set in x86_pmu_hw_config().      */
+		[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+						/* DATA_READ           */
+		[ C(RESULT_MISS)   ] = 0x0003,	/* DATA_READ_MISS      */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0001,	/* DATA_WRITE          */
+		[ C(RESULT_MISS)   ] = 0x0004,	/* DATA_WRITE_MISS     */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0011,	/* L1_DATA_PF1         */
+		[ C(RESULT_MISS)   ] = 0x001c,	/* L1_DATA_PF1_MISS    */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x000c,	/* CODE_READ          */
+		[ C(RESULT_MISS)   ] = 0x000e,	/* CODE_CACHE_MISS    */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0,
+		[ C(RESULT_MISS)   ] = 0x10cb,	/* L2_READ_MISS */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x10cc,	/* L2_WRITE_HIT */
+		[ C(RESULT_MISS)   ] = 0,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x10fc,	/* L2_DATA_PF2      */
+		[ C(RESULT_MISS)   ] = 0x10fe,	/* L2_DATA_PF2_MISS */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = ARCH_PERFMON_EVENTSEL_INT,
+						/* DATA_READ */
+						/* see note on L1 OP_READ */
+		[ C(RESULT_MISS)   ] = 0x0002,	/* DATA_PAGE_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0001,	/* DATA_WRITE */
+		[ C(RESULT_MISS)   ] = 0x0002,	/* DATA_PAGE_WALK */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x000c,	/* CODE_READ */
+		[ C(RESULT_MISS)   ] = 0x000d,	/* CODE_PAGE_WALK */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0012,	/* BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x002b,	/* BRANCHES_MISPREDICTED */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
+
+static u64 knc_pmu_event_map(int hw_event)
+{
+	return knc_perfmon_event_map[hw_event];
+}
+
+static struct event_constraint knc_event_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xc3, 0x1),	/* HWP_L2HIT */
+	INTEL_EVENT_CONSTRAINT(0xc4, 0x1),	/* HWP_L2MISS */
+	INTEL_EVENT_CONSTRAINT(0xc8, 0x1),	/* L2_READ_HIT_E */
+	INTEL_EVENT_CONSTRAINT(0xc9, 0x1),	/* L2_READ_HIT_M */
+	INTEL_EVENT_CONSTRAINT(0xca, 0x1),	/* L2_READ_HIT_S */
+	INTEL_EVENT_CONSTRAINT(0xcb, 0x1),	/* L2_READ_MISS */
+	INTEL_EVENT_CONSTRAINT(0xcc, 0x1),	/* L2_WRITE_HIT */
+	INTEL_EVENT_CONSTRAINT(0xce, 0x1),	/* L2_STRONGLY_ORDERED_STREAMING_VSTORES_MISS */
+	INTEL_EVENT_CONSTRAINT(0xcf, 0x1),	/* L2_WEAKLY_ORDERED_STREAMING_VSTORE_MISS */
+	INTEL_EVENT_CONSTRAINT(0xd7, 0x1),	/* L2_VICTIM_REQ_WITH_DATA */
+	INTEL_EVENT_CONSTRAINT(0xe3, 0x1),	/* SNP_HITM_BUNIT */
+	INTEL_EVENT_CONSTRAINT(0xe6, 0x1),	/* SNP_HIT_L2 */
+	INTEL_EVENT_CONSTRAINT(0xe7, 0x1),	/* SNP_HITM_L2 */
+	INTEL_EVENT_CONSTRAINT(0xf1, 0x1),	/* L2_DATA_READ_MISS_CACHE_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf2, 0x1),	/* L2_DATA_WRITE_MISS_CACHE_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf6, 0x1),	/* L2_DATA_READ_MISS_MEM_FILL */
+	INTEL_EVENT_CONSTRAINT(0xf7, 0x1),	/* L2_DATA_WRITE_MISS_MEM_FILL */
+	INTEL_EVENT_CONSTRAINT(0xfc, 0x1),	/* L2_DATA_PF2 */
+	INTEL_EVENT_CONSTRAINT(0xfd, 0x1),	/* L2_DATA_PF2_DROP */
+	INTEL_EVENT_CONSTRAINT(0xfe, 0x1),	/* L2_DATA_PF2_MISS */
+	INTEL_EVENT_CONSTRAINT(0xff, 0x1),	/* L2_DATA_HIT_INFLIGHT_PF2 */
+	EVENT_CONSTRAINT_END
+};
+
+#define MSR_KNC_IA32_PERF_GLOBAL_STATUS		0x0000002d
+#define MSR_KNC_IA32_PERF_GLOBAL_OVF_CONTROL	0x0000002e
+#define MSR_KNC_IA32_PERF_GLOBAL_CTRL		0x0000002f
+
+#define KNC_ENABLE_COUNTER0			0x00000001
+#define KNC_ENABLE_COUNTER1			0x00000002
+
+static void knc_pmu_disable_all(void)
+{
+	u64 val;
+
+	rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+	val &= ~(KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+	wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static void knc_pmu_enable_all(int added)
+{
+	u64 val;
+
+	rdmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+	val |= (KNC_ENABLE_COUNTER0|KNC_ENABLE_COUNTER1);
+	wrmsrl(MSR_KNC_IA32_PERF_GLOBAL_CTRL, val);
+}
+
+static inline void
+knc_pmu_disable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val &= ~ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+static void knc_pmu_enable_event(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
+	u64 val;
+
+	val = hwc->config;
+	if (cpuc->enabled)
+		val |= ARCH_PERFMON_EVENTSEL_ENABLE;
+
+	(void)wrmsrl_safe(hwc->config_base + hwc->idx, val);
+}
+
+PMU_FORMAT_ATTR(event,	"config:0-7"	);
+PMU_FORMAT_ATTR(umask,	"config:8-15"	);
+PMU_FORMAT_ATTR(edge,	"config:18"	);
+PMU_FORMAT_ATTR(inv,	"config:23"	);
+PMU_FORMAT_ATTR(cmask,	"config:24-31"	);
+
+static struct attribute *intel_knc_formats_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_umask.attr,
+	&format_attr_edge.attr,
+	&format_attr_inv.attr,
+	&format_attr_cmask.attr,
+	NULL,
+};
+
+static __initconst struct x86_pmu knc_pmu = {
+	.name			= "knc",
+	.handle_irq		= x86_pmu_handle_irq,
+	.disable_all		= knc_pmu_disable_all,
+	.enable_all		= knc_pmu_enable_all,
+	.enable			= knc_pmu_enable_event,
+	.disable		= knc_pmu_disable_event,
+	.hw_config		= x86_pmu_hw_config,
+	.schedule_events	= x86_schedule_events,
+	.eventsel		= MSR_KNC_EVNTSEL0,
+	.perfctr		= MSR_KNC_PERFCTR0,
+	.event_map		= knc_pmu_event_map,
+	.max_events             = ARRAY_SIZE(knc_perfmon_event_map),
+	.apic			= 1,
+	.max_period		= (1ULL << 31) - 1,
+	.version		= 0,
+	.num_counters		= 2,
+	/* in theory 40 bits, early silicon is buggy though */
+	.cntval_bits		= 32,
+	.cntval_mask		= (1ULL << 32) - 1,
+	.get_event_constraints	= x86_get_event_constraints,
+	.event_constraints	= knc_event_constraints,
+	.format_attrs		= intel_knc_formats_attr,
+};
+
+__init int knc_pmu_init(void)
+{
+	x86_pmu = knc_pmu;
+
+	memcpy(hw_cache_event_ids, knc_hw_cache_event_ids, 
+		sizeof(hw_cache_event_ids));
+
+	return 0;
+}
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 966512b2cacf..2e8caf03f593 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -56,6 +56,8 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
 		switch (boot_cpu_data.x86) {
 		case 6:
 			return msr - MSR_P6_PERFCTR0;
+		case 11:
+			return msr - MSR_KNC_PERFCTR0;
 		case 15:
 			return msr - MSR_P4_BPU_PERFCTR0;
 		}
@@ -82,6 +84,8 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
 		switch (boot_cpu_data.x86) {
 		case 6:
 			return msr - MSR_P6_EVNTSEL0;
+		case 11:
+			return msr - MSR_KNC_EVNTSEL0;
 		case 15:
 			return msr - MSR_P4_BSU_ESCR0;
 		}
-- 
cgit v1.2.3


From 34b6f01a79bd65fbd06511d2cb7b28e33a506246 Mon Sep 17 00:00:00 2001
From: Olaf Hering <olaf@aepfle.de>
Date: Mon, 1 Oct 2012 21:18:01 +0200
Subject: xen pv-on-hvm: add pfn_is_ram helper for kdump

Register pfn_is_ram helper speed up reading /proc/vmcore in the kdump
kernel. See commit message of 997c136f518c ("fs/proc/vmcore.c: add hook
to read_from_oldmem() to check for non-ram pages") for details.

It makes use of a new hvmop HVMOP_get_mem_type which was introduced in
xen 4.2 (23298:26413986e6e0) and backported to 4.1.1.

The new function is currently only enabled for reading /proc/vmcore.
Later it will be used also for the kexec kernel. Since that requires
more changes in the generic kernel make it static for the time being.

Signed-off-by: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/mmu.c                 | 41 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/hvm/hvm_op.h | 19 ++++++++++++++++++
 2 files changed, 60 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824cc2b3..963cb2df636a 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -47,6 +47,7 @@
 #include <linux/gfp.h>
 #include <linux/memblock.h>
 #include <linux/seq_file.h>
+#include <linux/crash_dump.h>
 
 #include <trace/events/xen.h>
 
@@ -2381,6 +2382,43 @@ void xen_destroy_contiguous_region(unsigned long vstart, unsigned int order)
 EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region);
 
 #ifdef CONFIG_XEN_PVHVM
+#ifdef CONFIG_PROC_VMCORE
+/*
+ * This function is used in two contexts:
+ * - the kdump kernel has to check whether a pfn of the crashed kernel
+ *   was a ballooned page. vmcore is using this function to decide
+ *   whether to access a pfn of the crashed kernel.
+ * - the kexec kernel has to check whether a pfn was ballooned by the
+ *   previous kernel. If the pfn is ballooned, handle it properly.
+ * Returns 0 if the pfn is not backed by a RAM page, the caller may
+ * handle the pfn special in this case.
+ */
+static int xen_oldmem_pfn_is_ram(unsigned long pfn)
+{
+	struct xen_hvm_get_mem_type a = {
+		.domid = DOMID_SELF,
+		.pfn = pfn,
+	};
+	int ram;
+
+	if (HYPERVISOR_hvm_op(HVMOP_get_mem_type, &a))
+		return -ENXIO;
+
+	switch (a.mem_type) {
+		case HVMMEM_mmio_dm:
+			ram = 0;
+			break;
+		case HVMMEM_ram_rw:
+		case HVMMEM_ram_ro:
+		default:
+			ram = 1;
+			break;
+	}
+
+	return ram;
+}
+#endif
+
 static void xen_hvm_exit_mmap(struct mm_struct *mm)
 {
 	struct xen_hvm_pagetable_dying a;
@@ -2411,6 +2449,9 @@ void __init xen_hvm_init_mmu_ops(void)
 {
 	if (is_pagetable_dying_supported())
 		pv_mmu_ops.exit_mmap = xen_hvm_exit_mmap;
+#ifdef CONFIG_PROC_VMCORE
+	register_oldmem_pfn_is_ram(&xen_oldmem_pfn_is_ram);
+#endif
 }
 #endif
 
diff --git a/include/xen/interface/hvm/hvm_op.h b/include/xen/interface/hvm/hvm_op.h
index a4827f46ee97..956a04682865 100644
--- a/include/xen/interface/hvm/hvm_op.h
+++ b/include/xen/interface/hvm/hvm_op.h
@@ -43,4 +43,23 @@ struct xen_hvm_pagetable_dying {
 typedef struct xen_hvm_pagetable_dying xen_hvm_pagetable_dying_t;
 DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_pagetable_dying_t);
  
+enum hvmmem_type_t {
+    HVMMEM_ram_rw,             /* Normal read/write guest RAM */
+    HVMMEM_ram_ro,             /* Read-only; writes are discarded */
+    HVMMEM_mmio_dm,            /* Reads and write go to the device model */
+};
+
+#define HVMOP_get_mem_type    15
+/* Return hvmmem_type_t for the specified pfn. */
+struct xen_hvm_get_mem_type {
+    /* Domain to be queried. */
+    domid_t domid;
+    /* OUT variable. */
+    uint16_t mem_type;
+    uint16_t pad[2]; /* align next field on 8-byte boundary */
+    /* IN variable. */
+    uint64_t pfn;
+};
+DEFINE_GUEST_HANDLE_STRUCT(xen_hvm_get_mem_type);
+
 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */
-- 
cgit v1.2.3


From 2e132b12f78d88672711ae1d87624951de1089ca Mon Sep 17 00:00:00 2001
From: Robert Richter <robert.richter@amd.com>
Date: Wed, 12 Sep 2012 12:59:44 +0200
Subject: perf/AMD/IBS: Add sysfs support

Add sysfs format entries for AMD IBS PMUs:

 # find /sys/bus/event_source/devices/ibs_*/format
 /sys/bus/event_source/devices/ibs_fetch/format
 /sys/bus/event_source/devices/ibs_fetch/format/rand_en
 /sys/bus/event_source/devices/ibs_op/format
 /sys/bus/event_source/devices/ibs_op/format/cnt_ctl

This allows to specify following IBS options:

 $ perf record -e ibs_fetch/rand_en=1/GH ...
 $ perf record -e ibs_op/cnt_ctl=1/GH ...

Option cnt_ctl is only enabled if the IBS_CAPS_OPCNT bit is set in IBS
cpuid feature flags (AMD family 10h RevC and above).

Signed-off-by: Robert Richter <robert.richter@amd.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1347447584-28405-1-git-send-email-robert.richter@amd.com
[ Added small readability improvements. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_amd_ibs.c | 61 +++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 12 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
index eebd5ffe1bba..6336bcbd0618 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c
@@ -41,17 +41,22 @@ struct cpu_perf_ibs {
 };
 
 struct perf_ibs {
-	struct pmu	pmu;
-	unsigned int	msr;
-	u64		config_mask;
-	u64		cnt_mask;
-	u64		enable_mask;
-	u64		valid_mask;
-	u64		max_period;
-	unsigned long	offset_mask[1];
-	int		offset_max;
-	struct cpu_perf_ibs __percpu *pcpu;
-	u64		(*get_count)(u64 config);
+	struct pmu			pmu;
+	unsigned int			msr;
+	u64				config_mask;
+	u64				cnt_mask;
+	u64				enable_mask;
+	u64				valid_mask;
+	u64				max_period;
+	unsigned long			offset_mask[1];
+	int				offset_max;
+	struct cpu_perf_ibs __percpu	*pcpu;
+
+	struct attribute		**format_attrs;
+	struct attribute_group		format_group;
+	const struct attribute_group	*attr_groups[2];
+
+	u64				(*get_count)(u64 config);
 };
 
 struct perf_ibs_data {
@@ -446,6 +451,19 @@ static void perf_ibs_del(struct perf_event *event, int flags)
 
 static void perf_ibs_read(struct perf_event *event) { }
 
+PMU_FORMAT_ATTR(rand_en,	"config:57");
+PMU_FORMAT_ATTR(cnt_ctl,	"config:19");
+
+static struct attribute *ibs_fetch_format_attrs[] = {
+	&format_attr_rand_en.attr,
+	NULL,
+};
+
+static struct attribute *ibs_op_format_attrs[] = {
+	NULL,	/* &format_attr_cnt_ctl.attr if IBS_CAPS_OPCNT */
+	NULL,
+};
+
 static struct perf_ibs perf_ibs_fetch = {
 	.pmu = {
 		.task_ctx_nr	= perf_invalid_context,
@@ -465,6 +483,7 @@ static struct perf_ibs perf_ibs_fetch = {
 	.max_period		= IBS_FETCH_MAX_CNT << 4,
 	.offset_mask		= { MSR_AMD64_IBSFETCH_REG_MASK },
 	.offset_max		= MSR_AMD64_IBSFETCH_REG_COUNT,
+	.format_attrs		= ibs_fetch_format_attrs,
 
 	.get_count		= get_ibs_fetch_count,
 };
@@ -488,6 +507,7 @@ static struct perf_ibs perf_ibs_op = {
 	.max_period		= IBS_OP_MAX_CNT << 4,
 	.offset_mask		= { MSR_AMD64_IBSOP_REG_MASK },
 	.offset_max		= MSR_AMD64_IBSOP_REG_COUNT,
+	.format_attrs		= ibs_op_format_attrs,
 
 	.get_count		= get_ibs_op_count,
 };
@@ -597,6 +617,17 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 
 	perf_ibs->pcpu = pcpu;
 
+	/* register attributes */
+	if (perf_ibs->format_attrs[0]) {
+		memset(&perf_ibs->format_group, 0, sizeof(perf_ibs->format_group));
+		perf_ibs->format_group.name	= "format";
+		perf_ibs->format_group.attrs	= perf_ibs->format_attrs;
+
+		memset(&perf_ibs->attr_groups, 0, sizeof(perf_ibs->attr_groups));
+		perf_ibs->attr_groups[0]	= &perf_ibs->format_group;
+		perf_ibs->pmu.attr_groups	= perf_ibs->attr_groups;
+	}
+
 	ret = perf_pmu_register(&perf_ibs->pmu, name, -1);
 	if (ret) {
 		perf_ibs->pcpu = NULL;
@@ -608,13 +639,19 @@ static __init int perf_ibs_pmu_init(struct perf_ibs *perf_ibs, char *name)
 
 static __init int perf_event_ibs_init(void)
 {
+	struct attribute **attr = ibs_op_format_attrs;
+
 	if (!ibs_caps)
 		return -ENODEV;	/* ibs not supported by the cpu */
 
 	perf_ibs_pmu_init(&perf_ibs_fetch, "ibs_fetch");
-	if (ibs_caps & IBS_CAPS_OPCNT)
+
+	if (ibs_caps & IBS_CAPS_OPCNT) {
 		perf_ibs_op.config_mask |= IBS_OP_CNT_CTL;
+		*attr++ = &format_attr_cnt_ctl.attr;
+	}
 	perf_ibs_pmu_init(&perf_ibs_op, "ibs_op");
+
 	register_nmi_handler(NMI_LOCAL, perf_ibs_nmi_handler, 0, "perf_ibs");
 	printk(KERN_INFO "perf: AMD IBS detected (0x%08x)\n", ibs_caps);
 
-- 
cgit v1.2.3


From 75fdd155eaf755aa183ca9844a9a178b7a0e3959 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Thu, 4 Oct 2012 17:11:42 -0700
Subject: sections: fix section conflicts in arch/x86

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/apic.h          | 2 +-
 arch/x86/kernel/apic/apic_numachip.c | 4 ++--
 arch/x86/kernel/rtc.c                | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h
index f34261296ffb..338803422239 100644
--- a/arch/x86/include/asm/apic.h
+++ b/arch/x86/include/asm/apic.h
@@ -409,7 +409,7 @@ extern struct apic *apic;
  * to enforce the order with in them.
  */
 #define apic_driver(sym)					\
-	static struct apic *__apicdrivers_##sym __used		\
+	static const struct apic *__apicdrivers_##sym __used		\
 	__aligned(sizeof(struct apic *))			\
 	__section(.apicdrivers) = { &sym }
 
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c
index bc552cff2578..a65829ac2b9a 100644
--- a/arch/x86/kernel/apic/apic_numachip.c
+++ b/arch/x86/kernel/apic/apic_numachip.c
@@ -30,7 +30,7 @@
 
 static int numachip_system __read_mostly;
 
-static struct apic apic_numachip __read_mostly;
+static const struct apic apic_numachip __read_mostly;
 
 static unsigned int get_apic_id(unsigned long x)
 {
@@ -199,7 +199,7 @@ static int numachip_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
 	return 0;
 }
 
-static struct apic apic_numachip __refconst = {
+static const struct apic apic_numachip __refconst = {
 
 	.name				= "NumaConnect system",
 	.probe				= numachip_probe,
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index af6db6ec5b2a..4929c1be0ac0 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -225,7 +225,7 @@ static struct platform_device rtc_device = {
 static __init int add_rtc_cmos(void)
 {
 #ifdef CONFIG_PNP
-	static const char *ids[] __initconst =
+	static const char * const  const ids[] __initconst =
 	    { "PNP0b00", "PNP0b01", "PNP0b02", };
 	struct pnp_dev *dev;
 	struct pnp_id *id;
-- 
cgit v1.2.3


From 751f409db6216ebd134a94f6dcd97779933a5106 Mon Sep 17 00:00:00 2001
From: Denys Vlasenko <vda.linux@googlemail.com>
Date: Thu, 4 Oct 2012 17:15:31 -0700
Subject: compat: move compat_siginfo_t definition to asm/compat.h

This is a preparatory patch for the introduction of NT_SIGINFO elf note.

Make the location of compat_siginfo_t uniform across eight architectures
which have it.  Now it can be pulled in by including asm/compat.h or
linux/compat.h.

Most of the copies are verbatim.  compat_uid[32]_t had to be replaced by
__compat_uid[32]_t.  compat_uptr_t had to be moved up before
compat_siginfo_t in asm/compat.h on a several architectures (tile already
had it moved up).  compat_sigval_t had to be relocated from linux/compat.h
to asm/compat.h.

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: "Jonathan M. Foote" <jmfoote@cert.org>
Cc: Roland McGrath <roland@hack.frob.com>
Cc: Pedro Alves <palves@redhat.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/compat.h       | 60 +++++++++++++++++++++++++++-
 arch/arm64/kernel/signal32.c          | 53 -------------------------
 arch/mips/include/asm/compat-signal.h | 62 -----------------------------
 arch/mips/include/asm/compat.h        | 69 +++++++++++++++++++++++++++++++-
 arch/parisc/include/asm/compat.h      | 59 ++++++++++++++++++++++++++-
 arch/parisc/kernel/signal32.h         | 52 ------------------------
 arch/powerpc/include/asm/compat.h     | 60 +++++++++++++++++++++++++++-
 arch/powerpc/include/asm/siginfo.h    |  1 -
 arch/powerpc/kernel/ppc32.h           | 51 ------------------------
 arch/s390/include/asm/compat.h        | 75 ++++++++++++++++++++++++++++++++++-
 arch/s390/kernel/compat_linux.h       | 68 -------------------------------
 arch/sparc/include/asm/compat.h       | 61 +++++++++++++++++++++++++++-
 arch/sparc/include/asm/siginfo.h      |  1 -
 arch/sparc/kernel/signal32.c          | 52 ------------------------
 arch/tile/include/asm/compat.h        | 62 +++++++++++++++++++++++++++++
 arch/tile/kernel/compat_signal.c      | 57 --------------------------
 arch/x86/include/asm/compat.h         | 74 +++++++++++++++++++++++++++++++++-
 arch/x86/include/asm/ia32.h           | 67 -------------------------------
 include/linux/compat.h                |  5 ---
 19 files changed, 513 insertions(+), 476 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h
index a670a33ad736..37e610dc084e 100644
--- a/arch/arm64/include/asm/compat.h
+++ b/arch/arm64/include/asm/compat.h
@@ -55,6 +55,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -130,6 +131,64 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		/* The padding is the same size as AArch64. */
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid32_t _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			__compat_uid32_t _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			compat_uptr_t _addr; /* faulting insn/memory ref. */
+			short _addr_lsb; /* LSB of the reported address */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -139,7 +198,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c
index ac74c2f261e3..0790a87a4346 100644
--- a/arch/arm64/kernel/signal32.c
+++ b/arch/arm64/kernel/signal32.c
@@ -30,59 +30,6 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		/* The padding is the same size as AArch64. */
-		int _pad[SI_PAD_SIZE];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			__compat_uid32_t _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			__compat_uid32_t _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			__compat_uid32_t _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			compat_uptr_t _addr; /* faulting insn/memory ref. */
-			short _addr_lsb; /* LSB of the reported address */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			compat_long_t _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
 struct compat_sigaction {
 	compat_uptr_t			sa_handler;
 	compat_ulong_t			sa_flags;
diff --git a/arch/mips/include/asm/compat-signal.h b/arch/mips/include/asm/compat-signal.h
index 368a99e5c3e1..6599a901b63e 100644
--- a/arch/mips/include/asm/compat-signal.h
+++ b/arch/mips/include/asm/compat-signal.h
@@ -10,68 +10,6 @@
 
 #include <asm/uaccess.h>
 
-#define SI_PAD_SIZE32   ((SI_MAX_SIZE/sizeof(int)) - 3)
-
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_code;
-	int si_errno;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			compat_uid_t _uid;	/* sender's uid */
-		} _kill;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			compat_uid_t _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* IRIX SIGCHLD */
-		struct {
-			compat_pid_t _pid;	/* which child */
-			compat_clock_t _utime;
-			int _status;		/* exit code */
-			compat_clock_t _stime;
-		} _irix_sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			s32 _addr; /* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL, SIGXFSZ (To do ...)  */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-
-		/* POSIX.1b timers */
-		struct {
-			timer_t _tid;		/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;	/* sender's pid */
-			compat_uid_t _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-	} _sifields;
-} compat_siginfo_t;
-
 static inline int __copy_conv_sigset_to_user(compat_sigset_t __user *d,
 	const sigset_t *s)
 {
diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
index b77df0366ee6..58277e0e9cd4 100644
--- a/arch/mips/include/asm/compat.h
+++ b/arch/mips/include/asm/compat.h
@@ -43,6 +43,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -124,6 +125,73 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_code;
+	int si_errno;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid_t _uid;	/* sender's uid */
+		} _kill;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			__compat_uid_t _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* IRIX SIGCHLD */
+		struct {
+			compat_pid_t _pid;	/* which child */
+			compat_clock_t _utime;
+			int _status;		/* exit code */
+			compat_clock_t _stime;
+		} _irix_sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			s32 _addr; /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL, SIGXFSZ (To do ...)  */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+
+		/* POSIX.1b timers */
+		struct {
+			timer_t _tid;		/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;/* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;	/* sender's pid */
+			__compat_uid_t _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -133,7 +201,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
index 760f331d4fa3..db7a662691a8 100644
--- a/arch/parisc/include/asm/compat.h
+++ b/arch/parisc/include/asm/compat.h
@@ -36,6 +36,7 @@ typedef s64	compat_s64;
 typedef u32	compat_uint_t;
 typedef u32	compat_ulong_t;
 typedef u64	compat_u64;
+typedef u32	compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t		tv_sec;
@@ -127,6 +128,63 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;      /* sender's pid */
+			unsigned int _uid;      /* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;            /* timer id */
+			int _overrun;           /* overrun count */
+			char _pad[sizeof(unsigned int) - sizeof(int)];
+			compat_sigval_t _sigval;        /* same as below */
+			int _sys_private;       /* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;      /* sender's pid */
+			unsigned int _uid;      /* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;      /* which child */
+			unsigned int _uid;      /* sender's uid */
+			int _status;            /* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;     /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -136,7 +194,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h
index c7800846422c..08a88b5349a2 100644
--- a/arch/parisc/kernel/signal32.h
+++ b/arch/parisc/kernel/signal32.h
@@ -55,58 +55,6 @@ struct k_sigaction32 {
 	struct compat_sigaction sa;
 };
 
-typedef struct compat_siginfo {
-        int si_signo;
-        int si_errno;
-        int si_code;
-
-        union {
-                int _pad[((128/sizeof(int)) - 3)];
-
-                /* kill() */
-                struct {
-                        unsigned int _pid;      /* sender's pid */
-                        unsigned int _uid;      /* sender's uid */
-                } _kill;
-
-                /* POSIX.1b timers */
-                struct {
-                        compat_timer_t _tid;            /* timer id */
-                        int _overrun;           /* overrun count */
-                        char _pad[sizeof(unsigned int) - sizeof(int)];
-                        compat_sigval_t _sigval;        /* same as below */
-                        int _sys_private;       /* not to be passed to user */
-                } _timer;
-
-                /* POSIX.1b signals */
-                struct {
-                        unsigned int _pid;      /* sender's pid */
-                        unsigned int _uid;      /* sender's uid */
-                        compat_sigval_t _sigval;
-                } _rt;
-
-                /* SIGCHLD */
-                struct {
-                        unsigned int _pid;      /* which child */
-                        unsigned int _uid;      /* sender's uid */
-                        int _status;            /* exit code */
-                        compat_clock_t _utime;
-                        compat_clock_t _stime;
-                } _sigchld;
-
-                /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-                struct {
-                        unsigned int _addr;     /* faulting insn/memory ref. */
-                } _sigfault;
-
-                /* SIGPOLL */
-                struct {
-                        int _band;      /* POLL_IN, POLL_OUT, POLL_MSG */
-                        int _fd;
-                } _sigpoll;
-        } _sifields;
-} compat_siginfo_t;
-
 int copy_siginfo_to_user32 (compat_siginfo_t __user *to, siginfo_t *from);
 int copy_siginfo_from_user32 (siginfo_t *to, compat_siginfo_t __user *from);
 
diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
index 88e602f6430d..84fdf6857c31 100644
--- a/arch/powerpc/include/asm/compat.h
+++ b/arch/powerpc/include/asm/compat.h
@@ -38,6 +38,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -114,6 +115,64 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			__compat_uid_t _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			__compat_uid_t _uid;		/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;		/* which child */
+			__compat_uid_t _uid;		/* sender's uid */
+			int _status;			/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+		struct {
+			unsigned int _addr; /* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -123,7 +182,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/powerpc/include/asm/siginfo.h b/arch/powerpc/include/asm/siginfo.h
index 49495b0534ed..ccce3ef5cd86 100644
--- a/arch/powerpc/include/asm/siginfo.h
+++ b/arch/powerpc/include/asm/siginfo.h
@@ -10,7 +10,6 @@
 
 #ifdef __powerpc64__
 #    define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
-#    define SI_PAD_SIZE32		((SI_MAX_SIZE/sizeof(int)) - 3)
 #endif
 
 #include <asm-generic/siginfo.h>
diff --git a/arch/powerpc/kernel/ppc32.h b/arch/powerpc/kernel/ppc32.h
index dc16aefe1dd0..02fb0ee26093 100644
--- a/arch/powerpc/kernel/ppc32.h
+++ b/arch/powerpc/kernel/ppc32.h
@@ -16,57 +16,6 @@
 
 /* These are here to support 32-bit syscalls on a 64-bit kernel. */
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			compat_uid_t _uid;		/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;			/* timer id */
-			int _overrun;			/* overrun count */
-			compat_sigval_t _sigval;		/* same as below */
-			int _sys_private;		/* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			compat_uid_t _uid;		/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;		/* which child */
-			compat_uid_t _uid;		/* sender's uid */
-			int _status;			/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
-		struct {
-			unsigned int _addr; /* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
 #define __old_sigaction32	old_sigaction32
 
 struct __old_sigaction32 {
diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
index 234f1d859cea..a34a9d612fc0 100644
--- a/arch/s390/include/asm/compat.h
+++ b/arch/s390/include/asm/compat.h
@@ -65,6 +65,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -144,6 +145,79 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int	si_signo;
+	int	si_errno;
+	int	si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			pid_t	_pid;	/* sender's pid */
+			uid_t	_uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			pid_t			_pid;	/* sender's pid */
+			uid_t			_uid;	/* sender's uid */
+			compat_sigval_t		_sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			pid_t			_pid;	/* which child */
+			uid_t			_uid;	/* sender's uid */
+			int			_status;/* exit code */
+			compat_clock_t		_utime;
+			compat_clock_t		_stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			__u32	_addr;	/* faulting insn/memory ref. - pointer */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int	_band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int	_fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
+/*
+ * How these fields are to be accessed.
+ */
+#define si_pid		_sifields._kill._pid
+#define si_uid		_sifields._kill._uid
+#define si_status	_sifields._sigchld._status
+#define si_utime	_sifields._sigchld._utime
+#define si_stime	_sifields._sigchld._stime
+#define si_value	_sifields._rt._sigval
+#define si_int		_sifields._rt._sigval.sival_int
+#define si_ptr		_sifields._rt._sigval.sival_ptr
+#define si_addr		_sifields._sigfault._addr
+#define si_band		_sifields._sigpoll._band
+#define si_fd		_sifields._sigpoll._fd
+#define si_tid		_sifields._timer._tid
+#define si_overrun	_sifields._timer._overrun
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -153,7 +227,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/s390/kernel/compat_linux.h b/arch/s390/kernel/compat_linux.h
index 9635d759c2b9..90887bd98cf0 100644
--- a/arch/s390/kernel/compat_linux.h
+++ b/arch/s390/kernel/compat_linux.h
@@ -23,74 +23,6 @@ struct old_sigaction32 {
        __u32			sa_flags;
        __u32			sa_restorer;	/* Another 32 bit pointer */
 };
- 
-typedef struct compat_siginfo {
-	int	si_signo;
-	int	si_errno;
-	int	si_code;
-
-	union {
-		int _pad[((128/sizeof(int)) - 3)];
-
-		/* kill() */
-		struct {
-			pid_t	_pid;	/* sender's pid */
-			uid_t	_uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;		/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;       /* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			pid_t			_pid;	/* sender's pid */
-			uid_t			_uid;	/* sender's uid */
-			compat_sigval_t		_sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			pid_t			_pid;	/* which child */
-			uid_t			_uid;	/* sender's uid */
-			int			_status;/* exit code */
-			compat_clock_t		_utime;
-			compat_clock_t		_stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			__u32	_addr;	/* faulting insn/memory ref. - pointer */
-		} _sigfault;
-                          
-		/* SIGPOLL */
-		struct {
-			int	_band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int	_fd;
-		} _sigpoll;
-	} _sifields;
-} compat_siginfo_t;
-
-/*
- * How these fields are to be accessed.
- */
-#define si_pid		_sifields._kill._pid
-#define si_uid		_sifields._kill._uid
-#define si_status	_sifields._sigchld._status
-#define si_utime	_sifields._sigchld._utime
-#define si_stime	_sifields._sigchld._stime
-#define si_value	_sifields._rt._sigval
-#define si_int		_sifields._rt._sigval.sival_int
-#define si_ptr		_sifields._rt._sigval.sival_ptr
-#define si_addr		_sifields._sigfault._addr
-#define si_band		_sifields._sigpoll._band
-#define si_fd		_sifields._sigpoll._fd    
-#define si_tid		_sifields._timer._tid
-#define si_overrun	_sifields._timer._overrun
 
 /* asm/sigcontext.h */
 typedef union
diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
index b8be20d42a0a..cef99fbc0a21 100644
--- a/arch/sparc/include/asm/compat.h
+++ b/arch/sparc/include/asm/compat.h
@@ -36,6 +36,7 @@ typedef s64		compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64		compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -147,6 +148,65 @@ typedef u32		compat_old_sigset_t;
 
 typedef u32		compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define SI_PAD_SIZE32	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[SI_PAD_SIZE32];
+
+		/* kill() */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			unsigned int _uid;		/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;		/* timer id */
+			int _overrun;			/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			compat_pid_t _pid;		/* sender's pid */
+			unsigned int _uid;		/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			compat_pid_t _pid;		/* which child */
+			unsigned int _uid;		/* sender's uid */
+			int _status;			/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
+		struct {
+			u32 _addr; /* faulting insn/memory ref. */
+			int _trapno;
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -156,7 +216,6 @@ typedef u32		compat_sigset_word;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/sparc/include/asm/siginfo.h b/arch/sparc/include/asm/siginfo.h
index 215900fce21b..dbc182c438b4 100644
--- a/arch/sparc/include/asm/siginfo.h
+++ b/arch/sparc/include/asm/siginfo.h
@@ -3,7 +3,6 @@
 
 #if defined(__sparc__) && defined(__arch64__)
 
-#define SI_PAD_SIZE32	((SI_MAX_SIZE/sizeof(int)) - 3)
 #define __ARCH_SI_PREAMBLE_SIZE	(4 * sizeof(int))
 #define __ARCH_SI_BAND_T int
 
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index a53e0a5fd3a3..53e48f721ce3 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -54,58 +54,6 @@ struct signal_frame32 {
 	/* __siginfo_rwin_t * */u32 rwin_save;
 } __attribute__((aligned(8)));
 
-typedef struct compat_siginfo{
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[SI_PAD_SIZE32];
-
-		/* kill() */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;			/* timer id */
-			int _overrun;			/* overrun count */
-			compat_sigval_t _sigval;		/* same as below */
-			int _sys_private;		/* not to be passed to user */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			compat_pid_t _pid;		/* sender's pid */
-			unsigned int _uid;		/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			compat_pid_t _pid;		/* which child */
-			unsigned int _uid;		/* sender's uid */
-			int _status;			/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGEMT */
-		struct {
-			u32 _addr; /* faulting insn/memory ref. */
-			int _trapno;
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-}compat_siginfo_t;
-
 struct rt_signal_frame32 {
 	struct sparc_stackf32	ss;
 	compat_siginfo_t	info;
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 6e74450ff0a1..3063e6fc8daa 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -110,6 +110,68 @@ struct compat_flock64 {
 
 typedef u32               compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+#define COMPAT_SI_PAD_SIZE	(128/sizeof(int) - 3)
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[COMPAT_SI_PAD_SIZE];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+			int _overrun_incr;	/* amount to add to overrun */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+#ifdef __ARCH_SI_TRAPNO
+			int _trapno;	/* TRAP # which caused the signal */
+#endif
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index 474571b84085..7bc0859a9f5e 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -55,63 +55,6 @@ struct compat_ucontext {
 	sigset_t	  uc_sigmask;	/* mask last for extensibility */
 };
 
-#define COMPAT_SI_PAD_SIZE	((SI_MAX_SIZE - 3 * sizeof(int)) / sizeof(int))
-
-struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[COMPAT_SI_PAD_SIZE];
-
-		/* kill() */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;	/* not to be passed to user */
-			int _overrun_incr;	/* amount to add to overrun */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			unsigned int _addr;	/* faulting insn/memory ref. */
-#ifdef __ARCH_SI_TRAPNO
-			int _trapno;	/* TRAP # which caused the signal */
-#endif
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-	} _sifields;
-};
-
 struct compat_rt_sigframe {
 	unsigned char save_area[C_ABI_SAVE_AREA_SIZE]; /* caller save area */
 	struct compat_siginfo info;
diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
index fedf32b73e65..59c6c401f79f 100644
--- a/arch/x86/include/asm/compat.h
+++ b/arch/x86/include/asm/compat.h
@@ -41,6 +41,7 @@ typedef s64 __attribute__((aligned(4))) compat_s64;
 typedef u32		compat_uint_t;
 typedef u32		compat_ulong_t;
 typedef u64 __attribute__((aligned(4))) compat_u64;
+typedef u32		compat_uptr_t;
 
 struct compat_timespec {
 	compat_time_t	tv_sec;
@@ -124,6 +125,78 @@ typedef u32		compat_old_sigset_t;	/* at least 32 bits */
 
 typedef u32               compat_sigset_word;
 
+typedef union compat_sigval {
+	compat_int_t	sival_int;
+	compat_uptr_t	sival_ptr;
+} compat_sigval_t;
+
+typedef struct compat_siginfo {
+	int si_signo;
+	int si_errno;
+	int si_code;
+
+	union {
+		int _pad[128/sizeof(int) - 3];
+
+		/* kill() */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+		} _kill;
+
+		/* POSIX.1b timers */
+		struct {
+			compat_timer_t _tid;	/* timer id */
+			int _overrun;		/* overrun count */
+			compat_sigval_t _sigval;	/* same as below */
+			int _sys_private;	/* not to be passed to user */
+			int _overrun_incr;	/* amount to add to overrun */
+		} _timer;
+
+		/* POSIX.1b signals */
+		struct {
+			unsigned int _pid;	/* sender's pid */
+			unsigned int _uid;	/* sender's uid */
+			compat_sigval_t _sigval;
+		} _rt;
+
+		/* SIGCHLD */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_clock_t _utime;
+			compat_clock_t _stime;
+		} _sigchld;
+
+		/* SIGCHLD (x32 version) */
+		struct {
+			unsigned int _pid;	/* which child */
+			unsigned int _uid;	/* sender's uid */
+			int _status;		/* exit code */
+			compat_s64 _utime;
+			compat_s64 _stime;
+		} _sigchld_x32;
+
+		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
+		struct {
+			unsigned int _addr;	/* faulting insn/memory ref. */
+		} _sigfault;
+
+		/* SIGPOLL */
+		struct {
+			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
+			int _fd;
+		} _sigpoll;
+
+		struct {
+			unsigned int _call_addr; /* calling insn */
+			int _syscall;	/* triggering system call number */
+			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+		} _sigsys;
+	} _sifields;
+} compat_siginfo_t;
+
 #define COMPAT_OFF_T_MAX	0x7fffffff
 #define COMPAT_LOFF_T_MAX	0x7fffffffffffffffL
 
@@ -209,7 +282,6 @@ typedef struct user_regs_struct32 compat_elf_gregset_t;
  * as pointers because the syscall entry code will have
  * appropriately converted them already.
  */
-typedef	u32		compat_uptr_t;
 
 static inline void __user *compat_ptr(compat_uptr_t uptr)
 {
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h
index b04cbdb138cd..e6232773ce49 100644
--- a/arch/x86/include/asm/ia32.h
+++ b/arch/x86/include/asm/ia32.h
@@ -86,73 +86,6 @@ struct stat64 {
 	unsigned long long	st_ino;
 } __attribute__((packed));
 
-typedef struct compat_siginfo {
-	int si_signo;
-	int si_errno;
-	int si_code;
-
-	union {
-		int _pad[((128 / sizeof(int)) - 3)];
-
-		/* kill() */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-		} _kill;
-
-		/* POSIX.1b timers */
-		struct {
-			compat_timer_t _tid;	/* timer id */
-			int _overrun;		/* overrun count */
-			compat_sigval_t _sigval;	/* same as below */
-			int _sys_private;	/* not to be passed to user */
-			int _overrun_incr;	/* amount to add to overrun */
-		} _timer;
-
-		/* POSIX.1b signals */
-		struct {
-			unsigned int _pid;	/* sender's pid */
-			unsigned int _uid;	/* sender's uid */
-			compat_sigval_t _sigval;
-		} _rt;
-
-		/* SIGCHLD */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_clock_t _utime;
-			compat_clock_t _stime;
-		} _sigchld;
-
-		/* SIGCHLD (x32 version) */
-		struct {
-			unsigned int _pid;	/* which child */
-			unsigned int _uid;	/* sender's uid */
-			int _status;		/* exit code */
-			compat_s64 _utime;
-			compat_s64 _stime;
-		} _sigchld_x32;
-
-		/* SIGILL, SIGFPE, SIGSEGV, SIGBUS */
-		struct {
-			unsigned int _addr;	/* faulting insn/memory ref. */
-		} _sigfault;
-
-		/* SIGPOLL */
-		struct {
-			int _band;	/* POLL_IN, POLL_OUT, POLL_MSG */
-			int _fd;
-		} _sigpoll;
-
-		struct {
-			unsigned int _call_addr; /* calling insn */
-			int _syscall;	/* triggering system call number */
-			unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
-		} _sigsys;
-	} _sifields;
-} compat_siginfo_t;
-
 #define IA32_STACK_TOP IA32_PAGE_OFFSET
 
 #ifdef __KERNEL__
diff --git a/include/linux/compat.h b/include/linux/compat.h
index fd4e29956d1c..3f53d002c7c5 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -160,11 +160,6 @@ struct compat_ustat {
 	char			f_fpack[6];
 };
 
-typedef union compat_sigval {
-	compat_int_t	sival_int;
-	compat_uptr_t	sival_ptr;
-} compat_sigval_t;
-
 #define COMPAT_SIGEV_PAD_SIZE	((SIGEV_MAX_SIZE/sizeof(int)) - 3)
 
 typedef struct compat_sigevent {
-- 
cgit v1.2.3


From af1839eb4bd4fe079a125eb199205fceb6ae19e6 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:08 -0700
Subject: Kconfig: clean up the long arch list for the UID16 config option

Introduce HAVE_UID16 config option and select it in corresponding
architecture Kconfig files.  UID16 now only depends on HAVE_UID16.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Mikael Starvik <starvik@axis.com>
Cc: Jesper Nilsson <jesper.nilsson@axis.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig       | 1 +
 arch/arm64/Kconfig     | 1 +
 arch/blackfin/Kconfig  | 1 +
 arch/cris/Kconfig      | 1 +
 arch/frv/Kconfig       | 1 +
 arch/h8300/Kconfig     | 1 +
 arch/m68k/Kconfig      | 1 +
 arch/s390/Kconfig      | 1 +
 arch/sh/Kconfig        | 1 +
 arch/sparc/Kconfig     | 2 ++
 arch/um/Kconfig.common | 1 +
 arch/x86/Kconfig       | 2 ++
 init/Kconfig           | 6 ++++--
 13 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 6d2f7f5c0036..5f5439672932 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -39,6 +39,7 @@ config ARM
 	select HARDIRQS_SW_RESEND
 	select GENERIC_IRQ_PROBE
 	select GENERIC_IRQ_SHOW
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HARDIRQS_SW_RESEND
 	select CPU_PM if (SUSPEND || CPU_IDLE)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 767ba5685454..e61acae0d891 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -193,6 +193,7 @@ config COMPAT
 	bool "Kernel support for 32-bit EL0"
 	depends on !ARM64_64K_PAGES
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	help
 	  This option enables support for a 32-bit EL0 running under a 64-bit
 	  kernel at EL1. AArch32-specific components such as system calls,
diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig
index c7092e6057c5..2169889c73ec 100644
--- a/arch/blackfin/Kconfig
+++ b/arch/blackfin/Kconfig
@@ -33,6 +33,7 @@ config BLACKFIN
 	select HAVE_PERF_EVENTS
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_WANT_OPTIONAL_GPIOLIB
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_ATOMIC64
diff --git a/arch/cris/Kconfig b/arch/cris/Kconfig
index 72bd5ae50a89..a118163b04ee 100644
--- a/arch/cris/Kconfig
+++ b/arch/cris/Kconfig
@@ -42,6 +42,7 @@ config CRIS
 	select HAVE_IDE
 	select GENERIC_ATOMIC64
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_IOMAP
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig
index 971c0a19facb..cc5709d18350 100644
--- a/arch/frv/Kconfig
+++ b/arch/frv/Kconfig
@@ -5,6 +5,7 @@ config FRV
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
+	select HAVE_UID16
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index 5e8a0d9a09ce..90462eb23d02 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -3,6 +3,7 @@ config H8300
 	default y
 	select HAVE_IDE
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index b22df9410dce..3e2b2f66db60 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -6,6 +6,7 @@ config M68K
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_SHOW
 	select GENERIC_ATOMIC64
+	select HAVE_UID16
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS
 	select GENERIC_CPU_DEVICES
 	select GENERIC_STRNCPY_FROM_USER if MMU
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c8af429991d9..baba37cfcf84 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -126,6 +126,7 @@ config S390
 	select ARCH_INLINE_WRITE_UNLOCK_BH
 	select ARCH_INLINE_WRITE_UNLOCK_IRQ
 	select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+	select HAVE_UID16 if 32BIT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 36f5141e8041..f0c85e424777 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -21,6 +21,7 @@ config SUPERH
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_XZ
 	select HAVE_KERNEL_LZO
+	select HAVE_UID16
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 67f1f6f5f4e1..e66481015d3b 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -42,6 +42,7 @@ config SPARC32
 	def_bool !64BIT
 	select GENERIC_ATOMIC64
 	select CLZ_TAB
+	select HAVE_UID16
 
 config SPARC64
 	def_bool 64BIT
@@ -571,6 +572,7 @@ config COMPAT
 	depends on SPARC64
 	default y
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	select ARCH_WANT_OLD_COMPAT_IPC
 
 config SYSVIPC_COMPAT
diff --git a/arch/um/Kconfig.common b/arch/um/Kconfig.common
index cb837c223922..648121b037d5 100644
--- a/arch/um/Kconfig.common
+++ b/arch/um/Kconfig.common
@@ -7,6 +7,7 @@ config UML
 	bool
 	default y
 	select HAVE_GENERIC_HARDIRQS
+	select HAVE_UID16
 	select GENERIC_IRQ_SHOW
 	select GENERIC_CPU_DEVICES
 	select GENERIC_IO
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b72777ff32a9..fd5d7c2c2daa 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -10,6 +10,7 @@ config X86_32
 	def_bool y
 	depends on !64BIT
 	select CLKSRC_I8253
+	select HAVE_UID16
 
 config X86_64
 	def_bool y
@@ -2168,6 +2169,7 @@ config IA32_EMULATION
 	bool "IA32 Emulation"
 	depends on X86_64
 	select COMPAT_BINFMT_ELF
+	select HAVE_UID16
 	---help---
 	  Include code to run legacy 32-bit programs under a
 	  64-bit kernel. You should likely turn this on, unless you're
diff --git a/init/Kconfig b/init/Kconfig
index ed6334dd5e71..38bab420bd9b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1125,10 +1125,12 @@ menuconfig EXPERT
           environments which can tolerate a "non-standard" kernel.
           Only use this if you really know what you are doing.
 
+config HAVE_UID16
+	bool
+
 config UID16
 	bool "Enable 16-bit UID system calls" if EXPERT
-	depends on ARM || BLACKFIN || CRIS || FRV || H8300 || X86_32 || M68K || (S390 && !64BIT) || SUPERH || SPARC32 || (SPARC64 && COMPAT) || UML || (X86_64 && IA32_EMULATION) \
-		|| AARCH32_EMULATION
+	depends on HAVE_UID16
 	default y
 	help
 	  This enables the legacy 16-bit UID syscall wrappers.
-- 
cgit v1.2.3


From b69ec42b1b194cc88f04b3fbcda8d3f93182d6c3 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:11 -0700
Subject: Kconfig: clean up the long arch list for the DEBUG_KMEMLEAK config
 option

Introduce HAVE_DEBUG_KMEMLEAK config option and select it in corresponding
architecture Kconfig files.  DEBUG_KMEMLEAK now only depends on
HAVE_DEBUG_KMEMLEAK.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Paul Mundt <lethal@linux-sh.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/Kconfig        | 1 +
 arch/arm64/Kconfig      | 1 +
 arch/microblaze/Kconfig | 1 +
 arch/mips/Kconfig       | 1 +
 arch/powerpc/Kconfig    | 1 +
 arch/s390/Kconfig       | 1 +
 arch/sh/Kconfig         | 1 +
 arch/sparc/Kconfig      | 1 +
 arch/tile/Kconfig       | 1 +
 arch/x86/Kconfig        | 1 +
 lib/Kconfig.debug       | 8 ++++----
 11 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 5f5439672932..2867a7742306 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -25,6 +25,7 @@ config ARM
 	select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select HAVE_GENERIC_DMA_COHERENT
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_LZO
 	select HAVE_KERNEL_LZMA
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e61acae0d891..5dc9273781d6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -10,6 +10,7 @@ config ARM64
 	select GENERIC_TIME_VSYSCALL
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_TRACEHOOK
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DMA_ATTRS
 	select HAVE_GENERIC_DMA_COHERENT
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 6133bed2b855..53fd94ab60f0 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -16,6 +16,7 @@ config MICROBLAZE
 	select OF
 	select OF_EARLY_FLATTREE
 	select ARCH_WANT_IPC_PARSE_VERSION
+	select HAVE_DEBUG_KMEMLEAK
 	select IRQ_DOMAIN
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index faf65286574e..335115e5bdd9 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -17,6 +17,7 @@ config MIPS
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_KPROBES
 	select HAVE_KRETPROBES
+	select HAVE_DEBUG_KMEMLEAK
 	select ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	select RTC_LIB if !MACH_LOONGSON
 	select GENERIC_ATOMIC64 if !64BIT
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4ce0be32d153..6a798a70a6d1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -113,6 +113,7 @@ config PPC
 	select HAVE_DMA_API_DEBUG
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_OPROFILE
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_SYSCALL_WRAPPERS if PPC64
 	select GENERIC_ATOMIC64 if PPC32
 	select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index baba37cfcf84..8c6d7986f6d2 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -80,6 +80,7 @@ config S390
 	select HAVE_IRQ_WORK
 	select HAVE_PERF_EVENTS
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index f0c85e424777..cfbf3e3c982b 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -16,6 +16,7 @@ config SUPERH
 	select ARCH_HAVE_CUSTOM_GPIO_H
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
 	select PERF_USE_VMALLOC
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_KERNEL_GZIP
 	select HAVE_KERNEL_BZIP2
 	select HAVE_KERNEL_LZMA
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index e66481015d3b..274d6cf0ada2 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -60,6 +60,7 @@ config SPARC64
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_SYSCALL_TRACEPOINTS
+	select HAVE_DEBUG_KMEMLEAK
 	select RTC_DRV_CMOS
 	select RTC_DRV_BQ4802
 	select RTC_DRV_SUN4V
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index c9a3c1fe7297..9a0d77d3ba14 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -9,6 +9,7 @@ config TILE
 	select GENERIC_FIND_FIRST_BIT
 	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
+	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_GENERIC_HARDIRQS
 	select GENERIC_IRQ_PROBE
 	select GENERIC_PENDING_IRQ if SMP
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index fd5d7c2c2daa..3fea1848d955 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -66,6 +66,7 @@ config X86
 	select HAVE_PERF_EVENTS_NMI
 	select HAVE_PERF_REGS
 	select HAVE_PERF_USER_STACK_DUMP
+	select HAVE_DEBUG_KMEMLEAK
 	select ANON_INODES
 	select HAVE_ALIGNED_STRUCT_PAGE if SLUB && !M386
 	select HAVE_CMPXCHG_LOCAL if !M386
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7fba3a98967f..736db3990506 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -450,12 +450,12 @@ config SLUB_STATS
 	  out which slabs are relevant to a particular load.
 	  Try running: slabinfo -DA
 
+config HAVE_DEBUG_KMEMLEAK
+	bool
+
 config DEBUG_KMEMLEAK
 	bool "Kernel memory leak detector"
-	depends on DEBUG_KERNEL && EXPERIMENTAL && \
-		(X86 || ARM || PPC || MIPS || S390 || SPARC64 || SUPERH || \
-		 MICROBLAZE || TILE || ARM64)
-
+	depends on DEBUG_KERNEL && EXPERIMENTAL && HAVE_DEBUG_KMEMLEAK
 	select DEBUG_FS
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
-- 
cgit v1.2.3


From 7ac57a89de958fbb5271dc504d0c25e34dbeec32 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 8 Oct 2012 16:28:16 -0700
Subject: Kconfig: clean up the "#if defined(arch)" list for exception-trace
 sysctl entry

Introduce SYSCTL_EXCEPTION_TRACE config option and selec it in the
architectures requiring support for the "exception-trace" debug_table
entry in kernel/sysctl.c.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/Kconfig   | 1 +
 arch/powerpc/Kconfig | 1 +
 arch/s390/Kconfig    | 1 +
 arch/sparc/Kconfig   | 1 +
 arch/tile/Kconfig    | 1 +
 arch/x86/Kconfig     | 1 +
 init/Kconfig         | 5 +++++
 kernel/sysctl.c      | 3 +--
 8 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index a30856058742..7ff68c946073 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -28,6 +28,7 @@ config ARM64
 	select PERF_USE_VMALLOC
 	select RTC_LIB
 	select SPARSE_IRQ
+	select SYSCTL_EXCEPTION_TRACE
 	help
 	  ARM 64-bit (AArch64) Linux support.
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6a798a70a6d1..df7edb887a04 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -99,6 +99,7 @@ config PPC
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_TRACER
 	select HAVE_FUNCTION_GRAPH_TRACER
+	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select HAVE_IDE
 	select HAVE_IOREMAP_PROT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 8c6d7986f6d2..ceff7aef2477 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -68,6 +68,7 @@ config S390
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_SYSCALL_TRACEPOINTS
+	select SYSCTL_EXCEPTION_TRACE
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_REGS_AND_STACK_ACCESS_API
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index 700a01adec3a..e184075877d7 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -18,6 +18,7 @@ config SPARC
 	select HAVE_OPROFILE
 	select HAVE_ARCH_KGDB if !SMP || SPARC64
 	select HAVE_ARCH_TRACEHOOK
+	select SYSCTL_EXCEPTION_TRACE
 	select ARCH_WANT_OPTIONAL_GPIOLIB
 	select RTC_CLASS
 	select RTC_DRV_M48T59
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index df69d4296b4b..dc46490adca0 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -7,6 +7,7 @@ config TILE
 	select HAVE_DMA_API_DEBUG
 	select HAVE_KVM if !TILEGX
 	select GENERIC_FIND_FIRST_BIT
+	select SYSCTL_EXCEPTION_TRACE
 	select USE_GENERIC_SMP_HELPERS
 	select CC_OPTIMIZE_FOR_SIZE
 	select HAVE_DEBUG_KMEMLEAK
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 3fea1848d955..6119d6c7002e 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -47,6 +47,7 @@ config X86
 	select HAVE_FUNCTION_GRAPH_FP_TEST
 	select HAVE_FUNCTION_TRACE_MCOUNT_TEST
 	select HAVE_SYSCALL_TRACEPOINTS
+	select SYSCTL_EXCEPTION_TRACE
 	select HAVE_KVM
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_TRACEHOOK
diff --git a/init/Kconfig b/init/Kconfig
index 38bab420bd9b..4c93533da42c 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1152,6 +1152,11 @@ config SYSCTL_SYSCALL
 
 	  If unsure say N here.
 
+config SYSCTL_EXCEPTION_TRACE
+	bool
+	help
+	  Enable support for /proc/sys/debug/exception-trace.
+
 config KALLSYMS
 	 bool "Load all symbols for debugging/ksymoops" if EXPERT
 	 default y
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index c2a2f8084bad..26f65eaa01f9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1549,8 +1549,7 @@ static struct ctl_table fs_table[] = {
 };
 
 static struct ctl_table debug_table[] = {
-#if defined(CONFIG_X86) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) || \
-    defined(CONFIG_S390) || defined(CONFIG_TILE) || defined(CONFIG_ARM64)
+#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
 	{
 		.procname	= "exception-trace",
 		.data		= &show_unhandled_signals,
-- 
cgit v1.2.3


From b1a86e15dc0304366f50ba1720834bc419c801b1 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 8 Oct 2012 16:28:23 -0700
Subject: x86, pat: remove the dependency on 'vm_pgoff' in track/untrack pfn
 vma routines

'pfn' argument for track_pfn_vma_new() can be used for reserving the
attribute for the pfn range.  No need to depend on 'vm_pgoff'

Similarly, untrack_pfn_vma() can depend on the 'pfn' argument if it is
non-zero or can use follow_phys() to get the starting value of the pfn
range.

Also the non zero 'size' argument can be used instead of recomputing it
from vma.

This cleanup also prepares the ground for the track/untrack pfn vma
routines to take over the ownership of setting PAT specific vm_flag in the
'vma'.

[khlebnikov@openvz.org: Clear pfn to paddr conversion]
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c | 33 +++++++++++++++++++--------------
 1 file changed, 19 insertions(+), 14 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 3d68ef6d2266..de36c886cd38 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -704,21 +704,18 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 			unsigned long pfn, unsigned long size)
 {
+	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
-	resource_size_t paddr;
-	unsigned long vma_size = vma->vm_end - vma->vm_start;
 
-	if (is_linear_pfn_mapping(vma)) {
-		/* reserve the whole chunk starting from vm_pgoff */
-		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
-		return reserve_pfn_range(paddr, vma_size, prot, 0);
-	}
+	/* reserve the whole chunk starting from paddr */
+	if (is_linear_pfn_mapping(vma))
+		return reserve_pfn_range(paddr, size, prot, 0);
 
 	if (!pat_enabled)
 		return 0;
 
 	/* for vm_insert_pfn and friends, we set prot based on lookup */
-	flags = lookup_memtype(pfn << PAGE_SHIFT);
+	flags = lookup_memtype(paddr);
 	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
 			 flags);
 
@@ -728,20 +725,28 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 /*
  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
  */
 void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
 			unsigned long size)
 {
 	resource_size_t paddr;
-	unsigned long vma_size = vma->vm_end - vma->vm_start;
+	unsigned long prot;
 
-	if (is_linear_pfn_mapping(vma)) {
-		/* free the whole chunk starting from vm_pgoff */
-		paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
-		free_pfn_range(paddr, vma_size);
+	if (!is_linear_pfn_mapping(vma))
 		return;
+
+	/* free the chunk starting from pfn or the whole chunk */
+	paddr = (resource_size_t)pfn << PAGE_SHIFT;
+	if (!paddr && !size) {
+		if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
+			WARN_ON_ONCE(1);
+			return;
+		}
+
+		size = vma->vm_end - vma->vm_start;
 	}
+	free_pfn_range(paddr, size);
 }
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
-- 
cgit v1.2.3


From 5180da410db6369d1f95c9014da1c9bc33fb043e Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Mon, 8 Oct 2012 16:28:29 -0700
Subject: x86, pat: separate the pfn attribute tracking for remap_pfn_range and
 vm_insert_pfn

With PAT enabled, vm_insert_pfn() looks up the existing pfn memory
attribute and uses it.  Expectation is that the driver reserves the
memory attributes for the pfn before calling vm_insert_pfn().

remap_pfn_range() (when called for the whole vma) will setup a new
attribute (based on the prot argument) for the specified pfn range.
This addresses the legacy usage which typically calls remap_pfn_range()
with a desired memory attribute.  For ranges smaller than the vma size
(which is typically not the case), remap_pfn_range() will use the
existing memory attribute for the pfn range.

Expose two different API's for these different behaviors.
track_pfn_insert() for tracking the pfn attribute set by vm_insert_pfn()
and track_pfn_remap() for the remap_pfn_range().

This cleanup also prepares the ground for the track/untrack pfn vma
routines to take over the ownership of setting PAT specific vm_flag in
the 'vma'.

[khlebnikov@openvz.org: Clear checks in track_pfn_remap()]
[akpm@linux-foundation.org: tweak a few comments]
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c             | 47 +++++++++++++++++++++++++++---------
 include/asm-generic/pgtable.h | 55 +++++++++++++++++++++++++------------------
 mm/memory.c                   | 13 ++++------
 3 files changed, 73 insertions(+), 42 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index de36c886cd38..74a702674e86 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -664,13 +664,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
 }
 
 /*
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
  * copied through copy_page_range().
  *
  * If the vma has a linear pfn mapping for the entire range, we get the prot
  * from pte and reserve the entire vma range with single reserve_pfn_range call.
  */
-int track_pfn_vma_copy(struct vm_area_struct *vma)
+int track_pfn_copy(struct vm_area_struct *vma)
 {
 	resource_size_t paddr;
 	unsigned long prot;
@@ -694,15 +694,12 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
 }
 
 /*
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
- *
  * prot is passed in as a parameter for the new mapping. If the vma has a
  * linear pfn mapping for the entire range reserve the entire vma range with
  * single reserve_pfn_range call.
  */
-int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-			unsigned long pfn, unsigned long size)
+int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+		    unsigned long pfn, unsigned long size)
 {
 	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
@@ -714,8 +711,36 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 	if (!pat_enabled)
 		return 0;
 
-	/* for vm_insert_pfn and friends, we set prot based on lookup */
+	/*
+	 * For anything smaller than the vma size we set prot based on the
+	 * lookup.
+	 */
 	flags = lookup_memtype(paddr);
+
+	/* Check memtype for the remaining pages */
+	while (size > PAGE_SIZE) {
+		size -= PAGE_SIZE;
+		paddr += PAGE_SIZE;
+		if (flags != lookup_memtype(paddr))
+			return -EINVAL;
+	}
+
+	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
+			 flags);
+
+	return 0;
+}
+
+int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+		     unsigned long pfn)
+{
+	unsigned long flags;
+
+	if (!pat_enabled)
+		return 0;
+
+	/* Set prot based on lookup */
+	flags = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
 	*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
 			 flags);
 
@@ -723,12 +748,12 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
 }
 
 /*
- * untrack_pfn_vma is called while unmapping a pfnmap for a region.
+ * untrack_pfn is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
  * can be for the entire vma (in which case pfn, size are zero).
  */
-void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-			unsigned long size)
+void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+		 unsigned long size)
 {
 	resource_size_t paddr;
 	unsigned long prot;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index ff4947b7a976..d4d4592c97fc 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -381,48 +381,57 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
 
 #ifndef __HAVE_PFNMAP_TRACKING
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_new is called when a _new_ pfn mapping is being established
- * for physical range indicated by pfn and size.
+ * Interfaces that can be used by architecture code to keep track of
+ * memory type of pfn mappings specified by the remap_pfn_range,
+ * vm_insert_pfn.
+ */
+
+/*
+ * track_pfn_remap is called when a _new_ pfn mapping is being established
+ * by remap_pfn_range() for physical range indicated by pfn and size.
  */
-static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-					unsigned long pfn, unsigned long size)
+static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+				  unsigned long pfn, unsigned long size)
 {
 	return 0;
 }
 
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
- * track_pfn_vma_copy is called when vma that is covering the pfnmap gets
+ * track_pfn_insert is called when a _new_ single pfn is established
+ * by vm_insert_pfn().
+ */
+static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+				   unsigned long pfn)
+{
+	return 0;
+}
+
+/*
+ * track_pfn_copy is called when vma that is covering the pfnmap gets
  * copied through copy_page_range().
  */
-static inline int track_pfn_vma_copy(struct vm_area_struct *vma)
+static inline int track_pfn_copy(struct vm_area_struct *vma)
 {
 	return 0;
 }
 
 /*
- * Interface that can be used by architecture code to keep track of
- * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn)
- *
  * untrack_pfn_vma is called while unmapping a pfnmap for a region.
  * untrack can be called for a specific region indicated by pfn and size or
- * can be for the entire vma (in which case size can be zero).
+ * can be for the entire vma (in which case pfn, size are zero).
  */
-static inline void untrack_pfn_vma(struct vm_area_struct *vma,
-					unsigned long pfn, unsigned long size)
+static inline void untrack_pfn(struct vm_area_struct *vma,
+			       unsigned long pfn, unsigned long size)
 {
 }
 #else
-extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
-				unsigned long pfn, unsigned long size);
-extern int track_pfn_vma_copy(struct vm_area_struct *vma);
-extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn,
-				unsigned long size);
+extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
+			   unsigned long pfn, unsigned long size);
+extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
+			    unsigned long pfn);
+extern int track_pfn_copy(struct vm_area_struct *vma);
+extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
+			unsigned long size);
 #endif
 
 #ifdef CONFIG_MMU
diff --git a/mm/memory.c b/mm/memory.c
index 57361708d1a5..6bef278ad303 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1060,7 +1060,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
 		 */
-		ret = track_pfn_vma_copy(vma);
+		ret = track_pfn_copy(vma);
 		if (ret)
 			return ret;
 	}
@@ -1328,7 +1328,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 		uprobe_munmap(vma, start, end);
 
 	if (unlikely(is_pfn_mapping(vma)))
-		untrack_pfn_vma(vma, 0, 0);
+		untrack_pfn(vma, 0, 0);
 
 	if (start != end) {
 		if (unlikely(is_vm_hugetlb_page(vma))) {
@@ -2162,14 +2162,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
 
 	if (addr < vma->vm_start || addr >= vma->vm_end)
 		return -EFAULT;
-	if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE))
+	if (track_pfn_insert(vma, &pgprot, pfn))
 		return -EINVAL;
 
 	ret = insert_pfn(vma, addr, pfn, pgprot);
 
-	if (ret)
-		untrack_pfn_vma(vma, pfn, PAGE_SIZE);
-
 	return ret;
 }
 EXPORT_SYMBOL(vm_insert_pfn);
@@ -2311,7 +2308,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
-	err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size));
+	err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
 	if (err) {
 		/*
 		 * To indicate that track_pfn related cleanup is not
@@ -2335,7 +2332,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	} while (pgd++, addr = next, addr != end);
 
 	if (err)
-		untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size));
+		untrack_pfn(vma, pfn, PAGE_ALIGN(size));
 
 	return err;
 }
-- 
cgit v1.2.3


From b3b9c2932c32e0692018ed5f12f3fd8c70eea8ce Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Mon, 8 Oct 2012 16:28:34 -0700
Subject: mm, x86, pat: rework linear pfn-mmap tracking

Replace the generic vma-flag VM_PFN_AT_MMAP with x86-only VM_PAT.

We can toss mapping address from remap_pfn_range() into
track_pfn_vma_new(), and collect all PAT-related logic together in
arch/x86/.

This patch also restores orignal frustration-free is_cow_mapping() check
in remap_pfn_range(), as it was before commit v2.6.28-rc8-88-g3c8bb73
("x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3")

is_linear_pfn_mapping() checks can be removed from mm/huge_memory.c,
because it already handled by VM_PFNMAP in VM_NO_THP bit-mask.

[suresh.b.siddha@intel.com: Reset the VM_PAT flag as part of untrack_pfn_vma()]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Venkatesh Pallipadi <venki@google.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat.c             | 17 ++++++++++++-----
 include/asm-generic/pgtable.h |  6 ++++--
 include/linux/mm.h            | 20 +-------------------
 mm/huge_memory.c              | 19 +++----------------
 mm/memory.c                   | 26 ++++++++++----------------
 5 files changed, 30 insertions(+), 58 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 74a702674e86..0eb572eda406 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -677,7 +677,7 @@ int track_pfn_copy(struct vm_area_struct *vma)
 	unsigned long vma_size = vma->vm_end - vma->vm_start;
 	pgprot_t pgprot;
 
-	if (is_linear_pfn_mapping(vma)) {
+	if (vma->vm_flags & VM_PAT) {
 		/*
 		 * reserve the whole chunk covered by vma. We need the
 		 * starting address and protection from pte.
@@ -699,14 +699,20 @@ int track_pfn_copy(struct vm_area_struct *vma)
  * single reserve_pfn_range call.
  */
 int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-		    unsigned long pfn, unsigned long size)
+		    unsigned long pfn, unsigned long addr, unsigned long size)
 {
 	resource_size_t paddr = (resource_size_t)pfn << PAGE_SHIFT;
 	unsigned long flags;
 
 	/* reserve the whole chunk starting from paddr */
-	if (is_linear_pfn_mapping(vma))
-		return reserve_pfn_range(paddr, size, prot, 0);
+	if (addr == vma->vm_start && size == (vma->vm_end - vma->vm_start)) {
+		int ret;
+
+		ret = reserve_pfn_range(paddr, size, prot, 0);
+		if (!ret)
+			vma->vm_flags |= VM_PAT;
+		return ret;
+	}
 
 	if (!pat_enabled)
 		return 0;
@@ -758,7 +764,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 	resource_size_t paddr;
 	unsigned long prot;
 
-	if (!is_linear_pfn_mapping(vma))
+	if (!(vma->vm_flags & VM_PAT))
 		return;
 
 	/* free the chunk starting from pfn or the whole chunk */
@@ -772,6 +778,7 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
 		size = vma->vm_end - vma->vm_start;
 	}
 	free_pfn_range(paddr, size);
+	vma->vm_flags &= ~VM_PAT;
 }
 
 pgprot_t pgprot_writecombine(pgprot_t prot)
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index d4d4592c97fc..c9a612069c8e 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -391,7 +391,8 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm,
  * by remap_pfn_range() for physical range indicated by pfn and size.
  */
 static inline int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-				  unsigned long pfn, unsigned long size)
+				  unsigned long pfn, unsigned long addr,
+				  unsigned long size)
 {
 	return 0;
 }
@@ -426,7 +427,8 @@ static inline void untrack_pfn(struct vm_area_struct *vma,
 }
 #else
 extern int track_pfn_remap(struct vm_area_struct *vma, pgprot_t *prot,
-			   unsigned long pfn, unsigned long size);
+			   unsigned long pfn, unsigned long addr,
+			   unsigned long size);
 extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
 			    unsigned long pfn);
 extern int track_pfn_copy(struct vm_area_struct *vma);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 311be906b57d..75d1632d3477 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -117,7 +117,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_CAN_NONLINEAR 0x08000000	/* Has ->fault & does nonlinear pages */
 #define VM_MIXEDMAP	0x10000000	/* Can contain "struct page" and pure PFN pages */
 #define VM_SAO		0x20000000	/* Strong Access Ordering (powerpc) */
-#define VM_PFN_AT_MMAP	0x40000000	/* PFNMAP vma that is fully mapped at mmap time */
+#define VM_PAT		0x40000000	/* PAT reserves whole VMA at once (x86) */
 #define VM_MERGEABLE	0x80000000	/* KSM may merge identical pages */
 
 /* Bits set in the VMA until the stack is in its final location */
@@ -158,24 +158,6 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
 
-/*
- * This interface is used by x86 PAT code to identify a pfn mapping that is
- * linear over entire vma. This is to optimize PAT code that deals with
- * marking the physical region with a particular prot. This is not for generic
- * mm use. Note also that this check will not work if the pfn mapping is
- * linear for a vma starting at physical address 0. In which case PAT code
- * falls back to slow path of reserving physical range page by page.
- */
-static inline int is_linear_pfn_mapping(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & VM_PFN_AT_MMAP);
-}
-
-static inline int is_pfn_mapping(struct vm_area_struct *vma)
-{
-	return !!(vma->vm_flags & VM_PFNMAP);
-}
-
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
  * ->fault function. The vma's ->fault is responsible for returning a bitmask
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 141dbb695097..73cb22ee9665 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1655,11 +1655,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	/*
-	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
-	 * true too, verify it here.
-	 */
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -1912,11 +1908,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 		goto out;
 	if (is_vma_temporary_stack(vma))
 		goto out;
-	/*
-	 * If is_pfn_mapping() is true is_learn_pfn_mapping() must be
-	 * true too, verify it here.
-	 */
-	VM_BUG_ON(is_linear_pfn_mapping(vma) || vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 
 	pgd = pgd_offset(mm, address);
 	if (!pgd_present(*pgd))
@@ -2154,12 +2146,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
 			goto skip;
 		if (is_vma_temporary_stack(vma))
 			goto skip;
-		/*
-		 * If is_pfn_mapping() is true is_learn_pfn_mapping()
-		 * must be true too, verify it here.
-		 */
-		VM_BUG_ON(is_linear_pfn_mapping(vma) ||
-			  vma->vm_flags & VM_NO_THP);
+		VM_BUG_ON(vma->vm_flags & VM_NO_THP);
 
 		hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 		hend = vma->vm_end & HPAGE_PMD_MASK;
diff --git a/mm/memory.c b/mm/memory.c
index 6bef278ad303..655e1429388a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1055,7 +1055,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	if (is_vm_hugetlb_page(vma))
 		return copy_hugetlb_page_range(dst_mm, src_mm, vma);
 
-	if (unlikely(is_pfn_mapping(vma))) {
+	if (unlikely(vma->vm_flags & VM_PFNMAP)) {
 		/*
 		 * We do not free on error cases below as remove_vma
 		 * gets called on error from higher level routine
@@ -1327,7 +1327,7 @@ static void unmap_single_vma(struct mmu_gather *tlb,
 	if (vma->vm_file)
 		uprobe_munmap(vma, start, end);
 
-	if (unlikely(is_pfn_mapping(vma)))
+	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn(vma, 0, 0);
 
 	if (start != end) {
@@ -2299,26 +2299,20 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 * There's a horrible special case to handle copy-on-write
 	 * behaviour that some programs depend on. We mark the "original"
 	 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
+	 * See vm_normal_page() for details.
 	 */
-	if (addr == vma->vm_start && end == vma->vm_end) {
+	if (is_cow_mapping(vma->vm_flags)) {
+		if (addr != vma->vm_start || end != vma->vm_end)
+			return -EINVAL;
 		vma->vm_pgoff = pfn;
-		vma->vm_flags |= VM_PFN_AT_MMAP;
-	} else if (is_cow_mapping(vma->vm_flags))
+	}
+
+	err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
+	if (err)
 		return -EINVAL;
 
 	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
 
-	err = track_pfn_remap(vma, &prot, pfn, PAGE_ALIGN(size));
-	if (err) {
-		/*
-		 * To indicate that track_pfn related cleanup is not
-		 * needed from higher level routine calling unmap_vmas
-		 */
-		vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
-		vma->vm_flags &= ~VM_PFN_AT_MMAP;
-		return -EINVAL;
-	}
-
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
 	pgd = pgd_offset(mm, addr);
-- 
cgit v1.2.3


From 314e51b9851b4f4e8ab302243ff5a6fc6147f379 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Mon, 8 Oct 2012 16:29:02 -0700
Subject: mm: kill vma flag VM_RESERVED and mm->reserved_vm counter

A long time ago, in v2.4, VM_RESERVED kept swapout process off VMA,
currently it lost original meaning but still has some effects:

 | effect                 | alternative flags
-+------------------------+---------------------------------------------
1| account as reserved_vm | VM_IO
2| skip in core dump      | VM_IO, VM_DONTDUMP
3| do not merge or expand | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP
4| do not mlock           | VM_IO, VM_DONTEXPAND, VM_HUGETLB, VM_PFNMAP

This patch removes reserved_vm counter from mm_struct.  Seems like nobody
cares about it, it does not exported into userspace directly, it only
reduces total_vm showed in proc.

Thus VM_RESERVED can be replaced with VM_IO or pair VM_DONTEXPAND | VM_DONTDUMP.

remap_pfn_range() and io_remap_pfn_range() set VM_IO|VM_DONTEXPAND|VM_DONTDUMP.
remap_vmalloc_range() set VM_DONTEXPAND | VM_DONTDUMP.

[akpm@linux-foundation.org: drivers/vfio/pci/vfio_pci.c fixup]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Carsten Otte <cotte@de.ibm.com>
Cc: Chris Metcalf <cmetcalf@tilera.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Morris <james.l.morris@oracle.com>
Cc: Jason Baron <jbaron@redhat.com>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Robert Richter <robert.richter@amd.com>
Cc: Suresh Siddha <suresh.b.siddha@intel.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Venkatesh Pallipadi <venki@google.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/vm/unevictable-lru.txt             |  4 ++--
 arch/alpha/kernel/pci-sysfs.c                    |  2 +-
 arch/ia64/kernel/perfmon.c                       |  2 +-
 arch/ia64/mm/init.c                              |  3 ++-
 arch/powerpc/kvm/book3s_hv.c                     |  2 +-
 arch/sparc/kernel/pci.c                          |  2 +-
 arch/unicore32/kernel/process.c                  |  2 +-
 arch/x86/xen/mmu.c                               |  3 +--
 drivers/char/mbcs.c                              |  2 +-
 drivers/char/mem.c                               |  2 +-
 drivers/char/mspec.c                             |  2 +-
 drivers/gpu/drm/drm_gem.c                        |  2 +-
 drivers/gpu/drm/drm_vm.c                         | 10 ++--------
 drivers/gpu/drm/exynos/exynos_drm_gem.c          |  2 +-
 drivers/gpu/drm/gma500/framebuffer.c             |  3 +--
 drivers/gpu/drm/ttm/ttm_bo_vm.c                  |  4 ++--
 drivers/gpu/drm/udl/udl_fb.c                     |  2 +-
 drivers/infiniband/hw/ehca/ehca_uverbs.c         |  4 ++--
 drivers/infiniband/hw/ipath/ipath_file_ops.c     |  2 +-
 drivers/infiniband/hw/qib/qib_file_ops.c         |  2 +-
 drivers/media/pci/meye/meye.c                    |  2 +-
 drivers/media/platform/omap/omap_vout.c          |  2 +-
 drivers/media/platform/vino.c                    |  2 +-
 drivers/media/usb/sn9c102/sn9c102_core.c         |  3 +--
 drivers/media/usb/usbvision/usbvision-video.c    |  3 +--
 drivers/media/v4l2-core/videobuf-dma-sg.c        |  2 +-
 drivers/media/v4l2-core/videobuf-vmalloc.c       |  2 +-
 drivers/media/v4l2-core/videobuf2-memops.c       |  2 +-
 drivers/misc/carma/carma-fpga.c                  |  2 --
 drivers/misc/sgi-gru/grufile.c                   |  5 ++---
 drivers/mtd/mtdchar.c                            |  2 +-
 drivers/scsi/sg.c                                |  2 +-
 drivers/staging/omapdrm/omap_gem_dmabuf.c        |  2 +-
 drivers/staging/tidspbridge/rmgr/drv_interface.c |  2 +-
 drivers/uio/uio.c                                |  4 +---
 drivers/usb/mon/mon_bin.c                        |  2 +-
 drivers/video/68328fb.c                          |  2 +-
 drivers/video/aty/atyfb_base.c                   |  3 +--
 drivers/video/fb-puv3.c                          |  3 +--
 drivers/video/fb_defio.c                         |  2 +-
 drivers/video/fbmem.c                            |  3 +--
 drivers/video/gbefb.c                            |  2 +-
 drivers/video/omap2/omapfb/omapfb-main.c         |  2 +-
 drivers/video/sbuslib.c                          |  5 ++---
 drivers/video/smscufx.c                          |  1 -
 drivers/video/udlfb.c                            |  1 -
 drivers/video/vermilion/vermilion.c              |  1 -
 drivers/video/vfb.c                              |  1 -
 drivers/xen/gntalloc.c                           |  2 +-
 drivers/xen/gntdev.c                             |  2 +-
 drivers/xen/privcmd.c                            |  3 ++-
 fs/binfmt_elf.c                                  |  2 +-
 fs/binfmt_elf_fdpic.c                            |  2 +-
 fs/hugetlbfs/inode.c                             |  2 +-
 fs/proc/task_mmu.c                               |  2 +-
 include/linux/mempolicy.h                        |  2 +-
 include/linux/mm.h                               |  3 +--
 include/linux/mm_types.h                         |  1 -
 kernel/events/core.c                             |  2 +-
 mm/ksm.c                                         |  3 +--
 mm/memory.c                                      | 11 +++++------
 mm/mlock.c                                       |  2 +-
 mm/mmap.c                                        |  2 --
 mm/nommu.c                                       |  2 +-
 mm/vmalloc.c                                     |  3 +--
 security/selinux/selinuxfs.c                     |  2 +-
 sound/core/pcm_native.c                          |  6 +++---
 sound/usb/usx2y/us122l.c                         |  2 +-
 sound/usb/usx2y/usX2Yhwdep.c                     |  2 +-
 sound/usb/usx2y/usx2yhwdeppcm.c                  |  2 +-
 70 files changed, 77 insertions(+), 105 deletions(-)

(limited to 'arch/x86')

diff --git a/Documentation/vm/unevictable-lru.txt b/Documentation/vm/unevictable-lru.txt
index fa206cccf89f..323ff5dba1cc 100644
--- a/Documentation/vm/unevictable-lru.txt
+++ b/Documentation/vm/unevictable-lru.txt
@@ -371,8 +371,8 @@ mlock_fixup() filters several classes of "special" VMAs:
    mlock_fixup() will call make_pages_present() in the hugetlbfs VMA range to
    allocate the huge pages and populate the ptes.
 
-3) VMAs with VM_DONTEXPAND or VM_RESERVED are generally userspace mappings of
-   kernel pages, such as the VDSO page, relay channel pages, etc.  These pages
+3) VMAs with VM_DONTEXPAND are generally userspace mappings of kernel pages,
+   such as the VDSO page, relay channel pages, etc. These pages
    are inherently unevictable and are not managed on the LRU lists.
    mlock_fixup() treats these VMAs the same as hugetlbfs VMAs.  It calls
    make_pages_present() to populate the ptes.
diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index 53649c7d0068..b51f7b4818cd 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -26,7 +26,7 @@ static int hose_mmap_page_range(struct pci_controller *hose,
 		base = sparse ? hose->sparse_io_base : hose->dense_io_base;
 
 	vma->vm_pgoff += base >> PAGE_SHIFT;
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
 				  vma->vm_end - vma->vm_start,
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index f388b4e18a37..ea39eba61ef5 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2307,7 +2307,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
 	 */
 	vma->vm_mm	     = mm;
 	vma->vm_file	     = get_file(filp);
-	vma->vm_flags	     = VM_READ| VM_MAYREAD |VM_RESERVED;
+	vma->vm_flags	     = VM_READ|VM_MAYREAD|VM_DONTEXPAND|VM_DONTDUMP;
 	vma->vm_page_prot    = PAGE_READONLY; /* XXX may need to change */
 
 	/*
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 0eab454867a2..082e383c1b6f 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -138,7 +138,8 @@ ia64_init_addr_space (void)
 			vma->vm_mm = current->mm;
 			vma->vm_end = PAGE_SIZE;
 			vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
-			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO | VM_RESERVED;
+			vma->vm_flags = VM_READ | VM_MAYREAD | VM_IO |
+					VM_DONTEXPAND | VM_DONTDUMP;
 			down_write(&current->mm->mmap_sem);
 			if (insert_vm_struct(current->mm, vma)) {
 				up_write(&current->mm->mmap_sem);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 83e929e66f9d..721d4603a235 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1183,7 +1183,7 @@ static const struct vm_operations_struct kvm_rma_vm_ops = {
 
 static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &kvm_rma_vm_ops;
 	return 0;
 }
diff --git a/arch/sparc/kernel/pci.c b/arch/sparc/kernel/pci.c
index acc8c838ff72..75b31bcdeadf 100644
--- a/arch/sparc/kernel/pci.c
+++ b/arch/sparc/kernel/pci.c
@@ -779,7 +779,7 @@ static int __pci_mmap_make_offset(struct pci_dev *pdev,
 static void __pci_mmap_set_flags(struct pci_dev *dev, struct vm_area_struct *vma,
 					    enum pci_mmap_state mmap_state)
 {
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 }
 
 /* Set vm_page_prot of VMA, as appropriate for this architecture, for a pci
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index b6f0458c3143..b008586dad75 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -380,7 +380,7 @@ int vectors_user_mapping(void)
 	return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
 				       VM_READ | VM_EXEC |
 				       VM_MAYREAD | VM_MAYEXEC |
-				       VM_RESERVED,
+				       VM_DONTEXPAND | VM_DONTDUMP,
 				       NULL);
 }
 
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 5a16824cc2b3..fd28d86fe3d2 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -2451,8 +2451,7 @@ int xen_remap_domain_mfn_range(struct vm_area_struct *vma,
 
 	prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
 
-	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_RESERVED | VM_IO)) ==
-				(VM_PFNMAP | VM_RESERVED | VM_IO)));
+	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
 
 	rmd.mfn = mfn;
 	rmd.prot = prot;
diff --git a/drivers/char/mbcs.c b/drivers/char/mbcs.c
index 0c7d340b9ab9..f74e892711dd 100644
--- a/drivers/char/mbcs.c
+++ b/drivers/char/mbcs.c
@@ -507,7 +507,7 @@ static int mbcs_gscr_mmap(struct file *fp, struct vm_area_struct *vma)
 
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
-	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	/* Remap-pfn-range will mark the range VM_IO */
 	if (remap_pfn_range(vma,
 			    vma->vm_start,
 			    __pa(soft->gscr_addr) >> PAGE_SHIFT,
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index e5eedfa24c91..0537903c985b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -322,7 +322,7 @@ static int mmap_mem(struct file *file, struct vm_area_struct *vma)
 
 	vma->vm_ops = &mmap_mem_ops;
 
-	/* Remap-pfn-range will mark the range VM_IO and VM_RESERVED */
+	/* Remap-pfn-range will mark the range VM_IO */
 	if (remap_pfn_range(vma,
 			    vma->vm_start,
 			    vma->vm_pgoff,
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 845f97fd1832..e1f60f968fdd 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -286,7 +286,7 @@ mspec_mmap(struct file *file, struct vm_area_struct *vma,
 	atomic_set(&vdata->refcnt, 1);
 	vma->vm_private_data = vdata;
 
-	vma->vm_flags |= (VM_IO | VM_RESERVED | VM_PFNMAP | VM_DONTEXPAND);
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	if (vdata->type == MSPEC_FETCHOP || vdata->type == MSPEC_UNCACHED)
 		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	vma->vm_ops = &mspec_vm_ops;
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index 92177d5aedee..24efae464e2c 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -706,7 +706,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 		goto out_unlock;
 	}
 
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = map->handle;
 	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index 23a824e6a22a..db7bd292410b 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -514,8 +514,7 @@ static int drm_mmap_dma(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_ops = &drm_vm_dma_ops;
 
-	vma->vm_flags |= VM_RESERVED;	/* Don't swap */
-	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	drm_vm_open_locked(dev, vma);
 	return 0;
@@ -643,21 +642,16 @@ int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma)
 	case _DRM_SHM:
 		vma->vm_ops = &drm_vm_shm_ops;
 		vma->vm_private_data = (void *)map;
-		/* Don't let this area swap.  Change when
-		   DRM_KERNEL advisory is supported. */
-		vma->vm_flags |= VM_RESERVED;
 		break;
 	case _DRM_SCATTER_GATHER:
 		vma->vm_ops = &drm_vm_sg_ops;
 		vma->vm_private_data = (void *)map;
-		vma->vm_flags |= VM_RESERVED;
 		vma->vm_page_prot = drm_dma_prot(map->type, vma);
 		break;
 	default:
 		return -EINVAL;	/* This should never happen. */
 	}
-	vma->vm_flags |= VM_RESERVED;	/* Don't swap */
-	vma->vm_flags |= VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	drm_vm_open_locked(dev, vma);
 	return 0;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index fcdbe46914f7..d2545560664f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -500,7 +500,7 @@ static int exynos_drm_gem_mmap_buffer(struct file *filp,
 
 	DRM_DEBUG_KMS("%s\n", __FILE__);
 
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	update_vm_cache_attr(exynos_gem_obj, vma);
 
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 884ba73ac6ce..afded54dbb10 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -178,8 +178,7 @@ static int psbfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	 */
 	vma->vm_ops = &psbfb_vm_ops;
 	vma->vm_private_data = (void *)psbfb;
-	vma->vm_flags |= VM_RESERVED | VM_IO |
-					VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index a877813571a4..3ba72dbdc4bd 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -285,7 +285,7 @@ int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 	 */
 
 	vma->vm_private_data = bo;
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 out_unref:
 	ttm_bo_unref(&bo);
@@ -300,7 +300,7 @@ int ttm_fbdev_mmap(struct vm_area_struct *vma, struct ttm_buffer_object *bo)
 
 	vma->vm_ops = &ttm_bo_vm_ops;
 	vma->vm_private_data = ttm_bo_reference(bo);
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND;
 	return 0;
 }
 EXPORT_SYMBOL(ttm_fbdev_mmap);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index 67df842fbb33..69a2b16f42a6 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -243,7 +243,7 @@ static int udl_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/ehca/ehca_uverbs.c b/drivers/infiniband/hw/ehca/ehca_uverbs.c
index 45ee89b65c23..1a1d5d99fcf9 100644
--- a/drivers/infiniband/hw/ehca/ehca_uverbs.c
+++ b/drivers/infiniband/hw/ehca/ehca_uverbs.c
@@ -117,7 +117,7 @@ static int ehca_mmap_fw(struct vm_area_struct *vma, struct h_galpas *galpas,
 	physical = galpas->user.fw_handle;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 	ehca_gen_dbg("vsize=%llx physical=%llx", vsize, physical);
-	/* VM_IO | VM_RESERVED are set by remap_pfn_range() */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	ret = remap_4k_pfn(vma, vma->vm_start, physical >> EHCA_PAGESHIFT,
 			   vma->vm_page_prot);
 	if (unlikely(ret)) {
@@ -139,7 +139,7 @@ static int ehca_mmap_queue(struct vm_area_struct *vma, struct ipz_queue *queue,
 	u64 start, ofs;
 	struct page *page;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	start = vma->vm_start;
 	for (ofs = 0; ofs < queue->queue_length; ofs += PAGE_SIZE) {
 		u64 virt_addr = (u64)ipz_qeit_calc(queue, ofs);
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index 736d9edbdbe7..3eb7e454849b 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -1225,7 +1225,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &ipath_file_vm_ops;
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	ret = 1;
 
 bail:
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index faa44cb08071..959a5c4ff812 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -971,7 +971,7 @@ static int mmap_kvaddr(struct vm_area_struct *vma, u64 pgaddr,
 
 	vma->vm_pgoff = (unsigned long) addr >> PAGE_SHIFT;
 	vma->vm_ops = &qib_file_vm_ops;
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	ret = 1;
 
 bail:
diff --git a/drivers/media/pci/meye/meye.c b/drivers/media/pci/meye/meye.c
index 7bc775219f97..e5a76da86081 100644
--- a/drivers/media/pci/meye/meye.c
+++ b/drivers/media/pci/meye/meye.c
@@ -1647,7 +1647,7 @@ static int meye_mmap(struct file *file, struct vm_area_struct *vma)
 
 	vma->vm_ops = &meye_vm_ops;
 	vma->vm_flags &= ~VM_IO;	/* not I/O memory */
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = (void *) (offset / gbufsize);
 	meye_vm_open(vma);
 
diff --git a/drivers/media/platform/omap/omap_vout.c b/drivers/media/platform/omap/omap_vout.c
index 66ac21d466af..134016f0e660 100644
--- a/drivers/media/platform/omap/omap_vout.c
+++ b/drivers/media/platform/omap/omap_vout.c
@@ -911,7 +911,7 @@ static int omap_vout_mmap(struct file *file, struct vm_area_struct *vma)
 
 	q->bufs[i]->baddr = vma->vm_start;
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &omap_vout_vm_ops;
 	vma->vm_private_data = (void *) vout;
diff --git a/drivers/media/platform/vino.c b/drivers/media/platform/vino.c
index 790d96cffeea..70b0bf4b2900 100644
--- a/drivers/media/platform/vino.c
+++ b/drivers/media/platform/vino.c
@@ -3950,7 +3950,7 @@ found:
 
 	fb->map_count = 1;
 
-	vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_flags &= ~VM_IO;
 	vma->vm_private_data = fb;
 	vma->vm_file = file;
diff --git a/drivers/media/usb/sn9c102/sn9c102_core.c b/drivers/media/usb/sn9c102/sn9c102_core.c
index 19ea780b16ff..5bfc8e2f018f 100644
--- a/drivers/media/usb/sn9c102/sn9c102_core.c
+++ b/drivers/media/usb/sn9c102/sn9c102_core.c
@@ -2126,8 +2126,7 @@ static int sn9c102_mmap(struct file* filp, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	pos = cam->frame[i].bufmem;
 	while (size > 0) { /* size is page-aligned */
diff --git a/drivers/media/usb/usbvision/usbvision-video.c b/drivers/media/usb/usbvision/usbvision-video.c
index f67018ed3795..5c36a57e6590 100644
--- a/drivers/media/usb/usbvision/usbvision-video.c
+++ b/drivers/media/usb/usbvision/usbvision-video.c
@@ -1108,8 +1108,7 @@ static int usbvision_mmap(struct file *file, struct vm_area_struct *vma)
 	}
 
 	/* VM_IO is eventually going to replace PageReserved altogether */
-	vma->vm_flags |= VM_IO;
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 	pos = usbvision->frame[i].data;
 	while (size > 0) {
diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index f300deafd268..828e7c10bd70 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -582,7 +582,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	map->count    = 1;
 	map->q        = q;
 	vma->vm_ops   = &videobuf_vm_ops;
-	vma->vm_flags |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */
 	vma->vm_private_data = map;
 	dprintk(1, "mmap %p: q=%p %08lx-%08lx pgoff %08lx bufs %d-%d\n",
diff --git a/drivers/media/v4l2-core/videobuf-vmalloc.c b/drivers/media/v4l2-core/videobuf-vmalloc.c
index df142580e44c..2ff7fcc77b11 100644
--- a/drivers/media/v4l2-core/videobuf-vmalloc.c
+++ b/drivers/media/v4l2-core/videobuf-vmalloc.c
@@ -270,7 +270,7 @@ static int __videobuf_mmap_mapper(struct videobuf_queue *q,
 	}
 
 	vma->vm_ops          = &videobuf_vm_ops;
-	vma->vm_flags       |= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags       |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = map;
 
 	dprintk(1, "mmap %p: q=%p %08lx-%08lx (%lx) pgoff %08lx buf %d\n",
diff --git a/drivers/media/v4l2-core/videobuf2-memops.c b/drivers/media/v4l2-core/videobuf2-memops.c
index 504cd4cbe29e..051ea3571b20 100644
--- a/drivers/media/v4l2-core/videobuf2-memops.c
+++ b/drivers/media/v4l2-core/videobuf2-memops.c
@@ -163,7 +163,7 @@ int vb2_mmap_pfn_range(struct vm_area_struct *vma, unsigned long paddr,
 		return ret;
 	}
 
-	vma->vm_flags		|= VM_DONTEXPAND | VM_RESERVED;
+	vma->vm_flags		|= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data	= priv;
 	vma->vm_ops		= vm_ops;
 
diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c
index 0c43297ed9ac..8835eabb3b87 100644
--- a/drivers/misc/carma/carma-fpga.c
+++ b/drivers/misc/carma/carma-fpga.c
@@ -1243,8 +1243,6 @@ static int data_mmap(struct file *filp, struct vm_area_struct *vma)
 		return -EINVAL;
 	}
 
-	/* IO memory (stop cacheing) */
-	vma->vm_flags |= VM_IO | VM_RESERVED;
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	return io_remap_pfn_range(vma, vma->vm_start, addr, vsize,
diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c
index ecafa4ba238b..492c8cac69ac 100644
--- a/drivers/misc/sgi-gru/grufile.c
+++ b/drivers/misc/sgi-gru/grufile.c
@@ -108,9 +108,8 @@ static int gru_file_mmap(struct file *file, struct vm_area_struct *vma)
 				vma->vm_end & (GRU_GSEG_PAGESIZE - 1))
 		return -EINVAL;
 
-	vma->vm_flags |=
-	    (VM_IO | VM_DONTCOPY | VM_LOCKED | VM_DONTEXPAND | VM_PFNMAP |
-			VM_RESERVED);
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_LOCKED |
+			 VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_page_prot = PAGE_SHARED;
 	vma->vm_ops = &gru_vm_ops;
 
diff --git a/drivers/mtd/mtdchar.c b/drivers/mtd/mtdchar.c
index a6e74514e662..73ae81a629f2 100644
--- a/drivers/mtd/mtdchar.c
+++ b/drivers/mtd/mtdchar.c
@@ -1182,7 +1182,7 @@ static int mtdchar_mmap(struct file *file, struct vm_area_struct *vma)
 			return -EINVAL;
 		if (set_vm_offset(vma, off) < 0)
 			return -EINVAL;
-		vma->vm_flags |= VM_IO | VM_RESERVED;
+		vma->vm_flags |= VM_IO | VM_DONTEXPAND | VM_DONTDUMP;
 
 #ifdef pgprot_noncached
 		if (file->f_flags & O_DSYNC || off >= __pa(high_memory))
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 9c5c5f2b3962..be2c9a6561ff 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1257,7 +1257,7 @@ sg_mmap(struct file *filp, struct vm_area_struct *vma)
 	}
 
 	sfp->mmap_called = 1;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = sfp;
 	vma->vm_ops = &sg_mmap_vm_ops;
 	return 0;
diff --git a/drivers/staging/omapdrm/omap_gem_dmabuf.c b/drivers/staging/omapdrm/omap_gem_dmabuf.c
index 42728e0cc194..c6f3ef6f57b9 100644
--- a/drivers/staging/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/staging/omapdrm/omap_gem_dmabuf.c
@@ -160,7 +160,7 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
 		goto out_unlock;
 	}
 
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = obj->dev->driver->gem_vm_ops;
 	vma->vm_private_data = obj;
 	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
diff --git a/drivers/staging/tidspbridge/rmgr/drv_interface.c b/drivers/staging/tidspbridge/rmgr/drv_interface.c
index bddea1d3b2c3..701a11ac676d 100644
--- a/drivers/staging/tidspbridge/rmgr/drv_interface.c
+++ b/drivers/staging/tidspbridge/rmgr/drv_interface.c
@@ -261,7 +261,7 @@ static int bridge_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	u32 status;
 
-	vma->vm_flags |= VM_RESERVED | VM_IO;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	dev_dbg(bridge, "%s: vm filp %p start %lx end %lx page_prot %ulx "
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a783d533a1a6..5110f367f1f1 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -653,8 +653,6 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 	if (mi < 0)
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED;
-
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	return remap_pfn_range(vma,
@@ -666,7 +664,7 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
 
 static int uio_mmap_logical(struct vm_area_struct *vma)
 {
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &uio_vm_ops;
 	uio_vma_open(vma);
 	return 0;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 91cd85076a44..9a62e89d6dc0 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -1247,7 +1247,7 @@ static int mon_bin_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	/* don't do anything here: "fault" will set up page table entries */
 	vma->vm_ops = &mon_bin_vm_ops;
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_private_data = filp->private_data;
 	mon_bin_vma_open(vma);
 	return 0;
diff --git a/drivers/video/68328fb.c b/drivers/video/68328fb.c
index a425d65d5ba2..fa44fbed397d 100644
--- a/drivers/video/68328fb.c
+++ b/drivers/video/68328fb.c
@@ -400,7 +400,7 @@ static int mc68x328fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 #ifndef MMU
 	/* this is uClinux (no MMU) specific code */
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_start = videomemory;
 
 	return 0;
diff --git a/drivers/video/aty/atyfb_base.c b/drivers/video/aty/atyfb_base.c
index 3f2e8c13f1ca..868932f904ef 100644
--- a/drivers/video/aty/atyfb_base.c
+++ b/drivers/video/aty/atyfb_base.c
@@ -1942,8 +1942,7 @@ static int atyfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	off = vma->vm_pgoff << PAGE_SHIFT;
 	size = vma->vm_end - vma->vm_start;
 
-	/* To stop the swapper from even considering these pages. */
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 
 	if (((vma->vm_pgoff == 0) && (size == info->fix.smem_len)) ||
 	    ((off == info->fix.smem_len) && (size == PAGE_SIZE)))
diff --git a/drivers/video/fb-puv3.c b/drivers/video/fb-puv3.c
index 60a787fa32cf..7d106f1f4906 100644
--- a/drivers/video/fb-puv3.c
+++ b/drivers/video/fb-puv3.c
@@ -653,9 +653,8 @@ int unifb_mmap(struct fb_info *info,
 				vma->vm_page_prot))
 		return -EAGAIN;
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	return 0;
-
 }
 
 static struct fb_ops unifb_ops = {
diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index 64cda560c488..88cad6b8b479 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -166,7 +166,7 @@ static const struct address_space_operations fb_deferred_io_aops = {
 static int fb_deferred_io_mmap(struct fb_info *info, struct vm_area_struct *vma)
 {
 	vma->vm_ops = &fb_deferred_io_vm_ops;
-	vma->vm_flags |= ( VM_RESERVED | VM_DONTEXPAND );
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	if (!(info->flags & FBINFO_VIRTFB))
 		vma->vm_flags |= VM_IO;
 	vma->vm_private_data = info;
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 0dff12a1daef..3ff0105a496a 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1410,8 +1410,7 @@ fb_mmap(struct file *file, struct vm_area_struct * vma)
 		return -EINVAL;
 	off += start;
 	vma->vm_pgoff = off >> PAGE_SHIFT;
-	/* This is an IO map - tell maydump to skip this VMA */
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by io_remap_pfn_range()*/
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	fb_pgprotect(file, vma, off);
 	if (io_remap_pfn_range(vma, vma->vm_start, off >> PAGE_SHIFT,
diff --git a/drivers/video/gbefb.c b/drivers/video/gbefb.c
index 7e7b7a9ba274..05e2a8a99d8f 100644
--- a/drivers/video/gbefb.c
+++ b/drivers/video/gbefb.c
@@ -1024,7 +1024,7 @@ static int gbefb_mmap(struct fb_info *info,
 	pgprot_val(vma->vm_page_prot) =
 		pgprot_fb(pgprot_val(vma->vm_page_prot));
 
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 
 	/* look for the starting tile */
 	tile = &gbe_tiles.cpu[offset >> TILE_SHIFT];
diff --git a/drivers/video/omap2/omapfb/omapfb-main.c b/drivers/video/omap2/omapfb/omapfb-main.c
index 3c39aa8de928..15373f4aee19 100644
--- a/drivers/video/omap2/omapfb/omapfb-main.c
+++ b/drivers/video/omap2/omapfb/omapfb-main.c
@@ -1128,7 +1128,7 @@ static int omapfb_mmap(struct fb_info *fbi, struct vm_area_struct *vma)
 	DBG("user mmap region start %lx, len %d, off %lx\n", start, len, off);
 
 	vma->vm_pgoff = off >> PAGE_SHIFT;
-	vma->vm_flags |= VM_IO | VM_RESERVED;
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
 	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &mmap_user_ops;
 	vma->vm_private_data = rg;
diff --git a/drivers/video/sbuslib.c b/drivers/video/sbuslib.c
index 3c1de981a18c..296afae442f4 100644
--- a/drivers/video/sbuslib.c
+++ b/drivers/video/sbuslib.c
@@ -57,9 +57,8 @@ int sbusfb_mmap_helper(struct sbus_mmap_map *map,
 
 	off = vma->vm_pgoff << PAGE_SHIFT;
 
-	/* To stop the swapper from even considering these pages */
-	vma->vm_flags |= (VM_IO | VM_RESERVED);
-	
+	/* VM_IO | VM_DONTEXPAND | VM_DONTDUMP are set by remap_pfn_range() */
+
 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
 
 	/* Each page, see which map applies */
diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
index 5533a32c6ca1..97bd6620c364 100644
--- a/drivers/video/smscufx.c
+++ b/drivers/video/smscufx.c
@@ -803,7 +803,6 @@ static int ufx_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 }
 
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index 8af64148294b..f45eba3d6150 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -345,7 +345,6 @@ static int dlfb_ops_mmap(struct fb_info *info, struct vm_area_struct *vma)
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 }
 
diff --git a/drivers/video/vermilion/vermilion.c b/drivers/video/vermilion/vermilion.c
index 970e43d13f52..89aef343e295 100644
--- a/drivers/video/vermilion/vermilion.c
+++ b/drivers/video/vermilion/vermilion.c
@@ -1018,7 +1018,6 @@ static int vmlfb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 	offset += vinfo->vram_start;
 	pgprot_val(vma->vm_page_prot) |= _PAGE_PCD;
 	pgprot_val(vma->vm_page_prot) &= ~_PAGE_PWT;
-	vma->vm_flags |= VM_RESERVED | VM_IO;
 	if (remap_pfn_range(vma, vma->vm_start, offset >> PAGE_SHIFT,
 						size, vma->vm_page_prot))
 		return -EAGAIN;
diff --git a/drivers/video/vfb.c b/drivers/video/vfb.c
index 501a922aa9dc..c7f692525b88 100644
--- a/drivers/video/vfb.c
+++ b/drivers/video/vfb.c
@@ -439,7 +439,6 @@ static int vfb_mmap(struct fb_info *info,
 			size = 0;
 	}
 
-	vma->vm_flags |= VM_RESERVED;	/* avoid to swap out this VMA */
 	return 0;
 
 }
diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 934985d14c24..4097987b330e 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -535,7 +535,7 @@ static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
 
 	vma->vm_private_data = vm_priv;
 
-	vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	vma->vm_ops = &gntalloc_vmops;
 
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
index 5df9fd847b2e..610bfc6be177 100644
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -720,7 +720,7 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
 
 	vma->vm_ops = &gntdev_vmops;
 
-	vma->vm_flags |= VM_RESERVED|VM_DONTEXPAND;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	if (use_ptemod)
 		vma->vm_flags |= VM_DONTCOPY;
diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c
index ef6389580b8c..8adb9cc267f9 100644
--- a/drivers/xen/privcmd.c
+++ b/drivers/xen/privcmd.c
@@ -455,7 +455,8 @@ static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	/* DONTCOPY is essential for Xen because copy_page_range doesn't know
 	 * how to recreate these mappings */
-	vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTCOPY |
+			 VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &privcmd_vm_ops;
 	vma->vm_private_data = NULL;
 
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2b72d26e2e4b..e800dec958c3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1135,7 +1135,7 @@ static unsigned long vma_dump_size(struct vm_area_struct *vma,
 	}
 
 	/* Do not dump I/O mapped devices or special mappings */
-	if (vma->vm_flags & (VM_IO | VM_RESERVED))
+	if (vma->vm_flags & VM_IO)
 		return 0;
 
 	/* By default, dump shared memory if mapped from an anonymous file. */
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 08d812b32282..262db114ff01 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1205,7 +1205,7 @@ static int maydump(struct vm_area_struct *vma, unsigned long mm_flags)
 	int dump_ok;
 
 	/* Do not dump I/O mapped devices or special mappings */
-	if (vma->vm_flags & (VM_IO | VM_RESERVED)) {
+	if (vma->vm_flags & VM_IO) {
 		kdcore("%08lx: %08lx: no (IO)", vma->vm_start, vma->vm_flags);
 		return 0;
 	}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9460120a5170..0a0ab8e21b19 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -110,7 +110,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	 * way when do_mmap_pgoff unwinds (may be important on powerpc
 	 * and ia64).
 	 */
-	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
+	vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &hugetlb_vm_ops;
 
 	if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT))
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4540b8f76f16..79827ce03e3b 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -54,7 +54,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm)
 		"VmPTE:\t%8lu kB\n"
 		"VmSwap:\t%8lu kB\n",
 		hiwater_vm << (PAGE_SHIFT-10),
-		(total_vm - mm->reserved_vm) << (PAGE_SHIFT-10),
+		total_vm << (PAGE_SHIFT-10),
 		mm->locked_vm << (PAGE_SHIFT-10),
 		mm->pinned_vm << (PAGE_SHIFT-10),
 		hiwater_rss << (PAGE_SHIFT-10),
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 95b738c7abff..ba7a0ff19d39 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -239,7 +239,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
 {
-	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
+	if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
 		return 0;
 	/*
 	 * Migration allocates pages in the highest zone. If we cannot
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc08d558e058..0514fe9d3c84 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -96,7 +96,6 @@ extern unsigned int kobjsize(const void *objp);
 
 #define VM_DONTCOPY	0x00020000      /* Do not copy this vma on fork */
 #define VM_DONTEXPAND	0x00040000	/* Cannot expand with mremap() */
-#define VM_RESERVED	0x00080000	/* Count as reserved_vm like IO */
 #define VM_ACCOUNT	0x00100000	/* Is a VM accounted object */
 #define VM_NORESERVE	0x00200000	/* should the VM suppress accounting */
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
@@ -148,7 +147,7 @@ extern unsigned int kobjsize(const void *objp);
  * Special vmas that are non-mergable, non-mlock()able.
  * Note: mm/huge_memory.c VM_NO_THP depends on this definition.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
+#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP)
 
 /*
  * mapping from the currently active vm_flags protection bits (the
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 58d3173eb365..a57a43f5ca7c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -349,7 +349,6 @@ struct mm_struct {
 	unsigned long shared_vm;	/* Shared pages (files) */
 	unsigned long exec_vm;		/* VM_EXEC & ~VM_WRITE */
 	unsigned long stack_vm;		/* VM_GROWSUP/DOWN */
-	unsigned long reserved_vm;	/* VM_RESERVED|VM_IO pages */
 	unsigned long def_flags;
 	unsigned long nr_ptes;		/* Page table pages */
 	unsigned long start_code, end_code, start_data, end_data;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f16f3c58f11a..cda3ebd49e86 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3671,7 +3671,7 @@ unlock:
 		atomic_inc(&event->mmap_count);
 	mutex_unlock(&event->mmap_mutex);
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &perf_mmap_vmops;
 
 	return ret;
diff --git a/mm/ksm.c b/mm/ksm.c
index f9ccb16559ee..9638620a7530 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1469,8 +1469,7 @@ int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
 		 */
 		if (*vm_flags & (VM_MERGEABLE | VM_SHARED  | VM_MAYSHARE   |
 				 VM_PFNMAP    | VM_IO      | VM_DONTEXPAND |
-				 VM_RESERVED  | VM_HUGETLB |
-				 VM_NONLINEAR | VM_MIXEDMAP))
+				 VM_HUGETLB | VM_NONLINEAR | VM_MIXEDMAP))
 			return 0;		/* just ignore the advice */
 
 #ifdef VM_SAO
diff --git a/mm/memory.c b/mm/memory.c
index 7b1e4feaec06..e09c04813186 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2297,14 +2297,13 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 * rest of the world about it:
 	 *   VM_IO tells people not to look at these pages
 	 *	(accesses can have side effects).
-	 *   VM_RESERVED is specified all over the place, because
-	 *	in 2.4 it kept swapout's vma scan off this vma; but
-	 *	in 2.6 the LRU scan won't even find its pages, so this
-	 *	flag means no more than count its pages in reserved_vm,
-	 * 	and omit it from core dump, even when VM_IO turned off.
 	 *   VM_PFNMAP tells the core MM that the base pages are just
 	 *	raw PFN mappings, and do not have a "struct page" associated
 	 *	with them.
+	 *   VM_DONTEXPAND
+	 *      Disable vma merging and expanding with mremap().
+	 *   VM_DONTDUMP
+	 *      Omit vma from core dump, even when VM_IO turned off.
 	 *
 	 * There's a horrible special case to handle copy-on-write
 	 * behaviour that some programs depend on. We mark the "original"
@@ -2321,7 +2320,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	if (err)
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 
 	BUG_ON(addr >= end);
 	pfn -= addr >> PAGE_SHIFT;
diff --git a/mm/mlock.c b/mm/mlock.c
index ef726e8aa8e9..a948be4b7ba7 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -227,7 +227,7 @@ long mlock_vma_pages_range(struct vm_area_struct *vma,
 	if (vma->vm_flags & (VM_IO | VM_PFNMAP))
 		goto no_mlock;
 
-	if (!((vma->vm_flags & (VM_DONTEXPAND | VM_RESERVED)) ||
+	if (!((vma->vm_flags & VM_DONTEXPAND) ||
 			is_vm_hugetlb_page(vma) ||
 			vma == get_gate_vma(current->mm))) {
 
diff --git a/mm/mmap.c b/mm/mmap.c
index c1ad2e78ea58..a76042dc806d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -945,8 +945,6 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
 			mm->exec_vm += pages;
 	} else if (flags & stack_flags)
 		mm->stack_vm += pages;
-	if (flags & (VM_RESERVED|VM_IO))
-		mm->reserved_vm += pages;
 }
 #endif /* CONFIG_PROC_FS */
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 9c4a7b63a4df..12e84e69dd06 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1811,7 +1811,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	if (addr != (pfn << PAGE_SHIFT))
 		return -EINVAL;
 
-	vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 EXPORT_SYMBOL(remap_pfn_range);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 2bb90b1d241c..8de704679bfc 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2163,8 +2163,7 @@ int remap_vmalloc_range(struct vm_area_struct *vma, void *addr,
 		usize -= PAGE_SIZE;
 	} while (usize > 0);
 
-	/* Prevent "things" like memory migration? VM_flags need a cleanup... */
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 
 	return 0;
 }
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index 55af8c5b57e6..3a6e8731646c 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -485,7 +485,7 @@ static int sel_mmap_policy(struct file *filp, struct vm_area_struct *vma)
 			return -EACCES;
 	}
 
-	vma->vm_flags |= VM_RESERVED;
+	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = &sel_mmap_policy_ops;
 
 	return 0;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 20554eff5a21..5e12e5bacbba 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -3039,7 +3039,7 @@ static int snd_pcm_mmap_status(struct snd_pcm_substream *substream, struct file
 		return -EINVAL;
 	area->vm_ops = &snd_pcm_vm_ops_status;
 	area->vm_private_data = substream;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 
@@ -3076,7 +3076,7 @@ static int snd_pcm_mmap_control(struct snd_pcm_substream *substream, struct file
 		return -EINVAL;
 	area->vm_ops = &snd_pcm_vm_ops_control;
 	area->vm_private_data = substream;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	return 0;
 }
 #else /* ! coherent mmap */
@@ -3170,7 +3170,7 @@ static const struct vm_operations_struct snd_pcm_vm_ops_data_fault = {
 int snd_pcm_lib_default_mmap(struct snd_pcm_substream *substream,
 			     struct vm_area_struct *area)
 {
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 #ifdef ARCH_HAS_DMA_MMAP_COHERENT
 	if (!substream->ops->page &&
 	    substream->dma_buffer.dev.type == SNDRV_DMA_TYPE_DEV)
diff --git a/sound/usb/usx2y/us122l.c b/sound/usb/usx2y/us122l.c
index c4fd3b1d9592..d0323a693ba2 100644
--- a/sound/usb/usx2y/us122l.c
+++ b/sound/usb/usx2y/us122l.c
@@ -262,7 +262,7 @@ static int usb_stream_hwdep_mmap(struct snd_hwdep *hw,
 	}
 
 	area->vm_ops = &usb_stream_hwdep_vm_ops;
-	area->vm_flags |= VM_RESERVED;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = us122l;
 	atomic_inc(&us122l->mmap_count);
 out:
diff --git a/sound/usb/usx2y/usX2Yhwdep.c b/sound/usb/usx2y/usX2Yhwdep.c
index 04aafb43a13c..0b34dbc8f302 100644
--- a/sound/usb/usx2y/usX2Yhwdep.c
+++ b/sound/usb/usx2y/usX2Yhwdep.c
@@ -82,7 +82,7 @@ static int snd_us428ctls_mmap(struct snd_hwdep * hw, struct file *filp, struct v
 		us428->us428ctls_sharedmem->CtlSnapShotLast = -2;
 	}
 	area->vm_ops = &us428ctls_vm_ops;
-	area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = hw->private_data;
 	return 0;
 }
diff --git a/sound/usb/usx2y/usx2yhwdeppcm.c b/sound/usb/usx2y/usx2yhwdeppcm.c
index 8e40b6e67e9e..cc56007791e0 100644
--- a/sound/usb/usx2y/usx2yhwdeppcm.c
+++ b/sound/usb/usx2y/usx2yhwdeppcm.c
@@ -723,7 +723,7 @@ static int snd_usX2Y_hwdep_pcm_mmap(struct snd_hwdep * hw, struct file *filp, st
 		return -ENODEV;
 	}
 	area->vm_ops = &snd_usX2Y_hwdep_pcm_vm_ops;
-	area->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+	area->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
 	area->vm_private_data = hw->private_data;
 	return 0;
 }
-- 
cgit v1.2.3


From 5d3a551c28c6669dc43be40d8fafafbc2ec8f42b Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 8 Oct 2012 16:29:32 -0700
Subject: mm: hugetlb: add arch hook for clearing page flags before entering
 pool

The core page allocator ensures that page flags are zeroed when freeing
pages via free_pages_check.  A number of architectures (ARM, PPC, MIPS)
rely on this property to treat new pages as dirty with respect to the data
cache and perform the appropriate flushing before mapping the pages into
userspace.

This can lead to cache synchronisation problems when using hugepages,
since the allocator keeps its own pool of pages above the usual page
allocator and does not reset the page flags when freeing a page into the
pool.

This patch adds a new architecture hook, arch_clear_hugepage_flags, so
that architectures which rely on the page flags being in a particular
state for fresh allocations can adjust the flags accordingly when a page
is freed into the pool.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Cc: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/hugetlb.h    | 4 ++++
 arch/mips/include/asm/hugetlb.h    | 4 ++++
 arch/powerpc/include/asm/hugetlb.h | 4 ++++
 arch/s390/include/asm/hugetlb.h    | 1 +
 arch/sh/include/asm/hugetlb.h      | 6 ++++++
 arch/sparc/include/asm/hugetlb.h   | 4 ++++
 arch/tile/include/asm/hugetlb.h    | 4 ++++
 arch/x86/include/asm/hugetlb.h     | 4 ++++
 mm/hugetlb.c                       | 1 +
 9 files changed, 32 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index da55c63728e0..94eaa5bd5d0c 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -77,4 +77,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_IA64_HUGETLB_H */
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index 58d36889f09b..bd94946a18f3 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -112,4 +112,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index dfdb95bc59a5..62e11a32c4c2 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -151,6 +151,10 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #else /* ! CONFIG_HUGETLB_PAGE */
 static inline void flush_hugetlb_page(struct vm_area_struct *vma,
 				      unsigned long vmaddr)
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 2d6e6e380564..fc322421b1cc 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -33,6 +33,7 @@ static inline int prepare_hugepage_range(struct file *file,
 }
 
 #define hugetlb_prefault_arch_hook(mm)		do { } while (0)
+#define arch_clear_hugepage_flags(page)		do { } while (0)
 
 int arch_prepare_hugepage(struct page *page);
 void arch_release_hugepage(struct page *page);
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index 967068fb79ac..b3808c7d67b2 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -1,6 +1,7 @@
 #ifndef _ASM_SH_HUGETLB_H
 #define _ASM_SH_HUGETLB_H
 
+#include <asm/cacheflush.h>
 #include <asm/page.h>
 
 
@@ -89,4 +90,9 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+
 #endif /* _ASM_SH_HUGETLB_H */
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 177061064ee6..e7927c9758a1 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -82,4 +82,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_SPARC64_HUGETLB_H */
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index b2042380a5aa..0f885af2b621 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -106,6 +106,10 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #ifdef CONFIG_HUGETLB_SUPER_PAGES
 static inline pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
 				       struct page *page, int writable)
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index 439a9acc132d..bdd35dbd0605 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -90,4 +90,8 @@ static inline void arch_release_hugepage(struct page *page)
 {
 }
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+}
+
 #endif /* _ASM_X86_HUGETLB_H */
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index bc727122dd44..f1bb534254f6 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -637,6 +637,7 @@ static void free_huge_page(struct page *page)
 		h->surplus_huge_pages--;
 		h->surplus_huge_pages_node[nid]--;
 	} else {
+		arch_clear_hugepage_flags(page);
 		enqueue_huge_page(h, page);
 	}
 	spin_unlock(&hugetlb_lock);
-- 
cgit v1.2.3


From 15626062f4a98279c59a2a5208c496cf65cbf8c0 Mon Sep 17 00:00:00 2001
From: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Date: Mon, 8 Oct 2012 16:30:04 -0700
Subject: thp, x86: introduce HAVE_ARCH_TRANSPARENT_HUGEPAGE

Cleanup patch in preparation for transparent hugepage support on s390.
Adding new architectures to the TRANSPARENT_HUGEPAGE config option can
make the "depends" line rather ugly, like "depends on (X86 || (S390 &&
64BIT)) && MMU".

This patch adds a HAVE_ARCH_TRANSPARENT_HUGEPAGE instead.  x86 already has
MMU "def_bool y", so the MMU check is superfluous there and
HAVE_ARCH_TRANSPARENT_HUGEPAGE can be selected in arch/x86/Kconfig.

Signed-off-by: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/Kconfig     | 3 +++
 arch/x86/Kconfig | 1 +
 mm/Kconfig       | 2 +-
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/Kconfig b/arch/Kconfig
index a62965d057f6..550cce4dd648 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -313,4 +313,7 @@ config HAVE_IRQ_TIME_ACCOUNTING
 	  Archs need to ensure they use a high enough resolution clock to
 	  support irq time accounting and then call enable_sched_clock_irqtime().
 
+config HAVE_ARCH_TRANSPARENT_HUGEPAGE
+	bool
+
 source "kernel/gcov/Kconfig"
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6119d6c7002e..1ae94bcae5d9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -88,6 +88,7 @@ config X86
 	select IRQ_FORCED_THREADING
 	select USE_GENERIC_SMP_HELPERS if SMP
 	select HAVE_BPF_JIT if X86_64
+	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select CLKEVT_I8253
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select GENERIC_IOMAP
diff --git a/mm/Kconfig b/mm/Kconfig
index d5c8019c6627..3322342a1ffb 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -318,7 +318,7 @@ config NOMMU_INITIAL_TRIM_EXCESS
 
 config TRANSPARENT_HUGEPAGE
 	bool "Transparent Hugepage Support"
-	depends on X86 && MMU
+	depends on HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select COMPACTION
 	help
 	  Transparent Hugepages allows the kernel to use huge pages and
-- 
cgit v1.2.3


From 9d9e6f9703bbd642f3f2f807e6aaa642a4cbcec9 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:20 -0700
Subject: rbtree: remove prior augmented rbtree implementation

convert arch/x86/mm/pat_rbtree.c to the proposed augmented rbtree api
and remove the old augmented rbtree implementation.

Signed-off-by: Michel Lespinasse <walken@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat_rbtree.c | 65 +++++++++++++++++++++++++++++++-------------
 include/linux/rbtree.h   |  8 ------
 lib/rbtree.c             | 71 ------------------------------------------------
 3 files changed, 46 insertions(+), 98 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 8acaddd0fb21..7e1515bd4770 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -54,29 +54,57 @@ static u64 get_subtree_max_end(struct rb_node *node)
 	return ret;
 }
 
-/* Update 'subtree_max_end' for a node, based on node and its children */
-static void memtype_rb_augment_cb(struct rb_node *node, void *__unused)
+static u64 compute_subtree_max_end(struct memtype *data)
 {
-	struct memtype *data;
-	u64 max_end, child_max_end;
-
-	if (!node)
-		return;
-
-	data = container_of(node, struct memtype, rb);
-	max_end = data->end;
+	u64 max_end = data->end, child_max_end;
 
-	child_max_end = get_subtree_max_end(node->rb_right);
+	child_max_end = get_subtree_max_end(data->rb.rb_right);
 	if (child_max_end > max_end)
 		max_end = child_max_end;
 
-	child_max_end = get_subtree_max_end(node->rb_left);
+	child_max_end = get_subtree_max_end(data->rb.rb_left);
 	if (child_max_end > max_end)
 		max_end = child_max_end;
 
-	data->subtree_max_end = max_end;
+	return max_end;
+}
+
+/* Update 'subtree_max_end' for node and its parents */
+static void memtype_rb_propagate_cb(struct rb_node *node, struct rb_node *stop)
+{
+	while (node != stop) {
+		struct memtype *data = container_of(node, struct memtype, rb);
+		u64 subtree_max_end = compute_subtree_max_end(data);
+		if (data->subtree_max_end == subtree_max_end)
+			break;
+		data->subtree_max_end = subtree_max_end;
+		node = rb_parent(&data->rb);
+	}
+}
+
+static void memtype_rb_copy_cb(struct rb_node *old, struct rb_node *new)
+{
+	struct memtype *old_data = container_of(old, struct memtype, rb);
+	struct memtype *new_data = container_of(new, struct memtype, rb);
+
+	new_data->subtree_max_end = old_data->subtree_max_end;
 }
 
+/* Update 'subtree_max_end' after tree rotation. old and new are the
+ * former and current subtree roots */
+static void memtype_rb_rotate_cb(struct rb_node *old, struct rb_node *new)
+{
+	struct memtype *old_data = container_of(old, struct memtype, rb);
+	struct memtype *new_data = container_of(new, struct memtype, rb);
+
+	new_data->subtree_max_end = old_data->subtree_max_end;
+	old_data->subtree_max_end = compute_subtree_max_end(old_data);
+}
+
+static const struct rb_augment_callbacks memtype_rb_augment_cb = {
+	memtype_rb_propagate_cb, memtype_rb_copy_cb, memtype_rb_rotate_cb
+};
+
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
 				u64 start, u64 end)
@@ -179,15 +207,17 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata)
 		struct memtype *data = container_of(*node, struct memtype, rb);
 
 		parent = *node;
+		if (data->subtree_max_end < newdata->end)
+			data->subtree_max_end = newdata->end;
 		if (newdata->start <= data->start)
 			node = &((*node)->rb_left);
 		else if (newdata->start > data->start)
 			node = &((*node)->rb_right);
 	}
 
+	newdata->subtree_max_end = newdata->end;
 	rb_link_node(&newdata->rb, parent, node);
-	rb_insert_color(&newdata->rb, root);
-	rb_augment_insert(&newdata->rb, memtype_rb_augment_cb, NULL);
+	rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb);
 }
 
 int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
@@ -209,16 +239,13 @@ int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type)
 
 struct memtype *rbt_memtype_erase(u64 start, u64 end)
 {
-	struct rb_node *deepest;
 	struct memtype *data;
 
 	data = memtype_rb_exact_match(&memtype_rbroot, start, end);
 	if (!data)
 		goto out;
 
-	deepest = rb_augment_erase_begin(&data->rb);
-	rb_erase(&data->rb, &memtype_rbroot);
-	rb_augment_erase_end(deepest, memtype_rb_augment_cb, NULL);
+	rb_erase_augmented(&data->rb, &memtype_rbroot, &memtype_rb_augment_cb);
 out:
 	return data;
 }
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index c902eb9d6506..4ace31b33380 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -80,14 +80,6 @@ rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 }
 
 
-typedef void (*rb_augment_f)(struct rb_node *node, void *data);
-
-extern void rb_augment_insert(struct rb_node *node,
-			      rb_augment_f func, void *data);
-extern struct rb_node *rb_augment_erase_begin(struct rb_node *node);
-extern void rb_augment_erase_end(struct rb_node *node,
-				 rb_augment_f func, void *data);
-
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
 extern struct rb_node *rb_prev(const struct rb_node *);
diff --git a/lib/rbtree.c b/lib/rbtree.c
index a37ee7954b8f..c0088ca345f9 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -538,77 +538,6 @@ void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
 }
 EXPORT_SYMBOL(rb_erase_augmented);
 
-static void rb_augment_path(struct rb_node *node, rb_augment_f func, void *data)
-{
-	struct rb_node *parent;
-
-up:
-	func(node, data);
-	parent = rb_parent(node);
-	if (!parent)
-		return;
-
-	if (node == parent->rb_left && parent->rb_right)
-		func(parent->rb_right, data);
-	else if (parent->rb_left)
-		func(parent->rb_left, data);
-
-	node = parent;
-	goto up;
-}
-
-/*
- * after inserting @node into the tree, update the tree to account for
- * both the new entry and any damage done by rebalance
- */
-void rb_augment_insert(struct rb_node *node, rb_augment_f func, void *data)
-{
-	if (node->rb_left)
-		node = node->rb_left;
-	else if (node->rb_right)
-		node = node->rb_right;
-
-	rb_augment_path(node, func, data);
-}
-EXPORT_SYMBOL(rb_augment_insert);
-
-/*
- * before removing the node, find the deepest node on the rebalance path
- * that will still be there after @node gets removed
- */
-struct rb_node *rb_augment_erase_begin(struct rb_node *node)
-{
-	struct rb_node *deepest;
-
-	if (!node->rb_right && !node->rb_left)
-		deepest = rb_parent(node);
-	else if (!node->rb_right)
-		deepest = node->rb_left;
-	else if (!node->rb_left)
-		deepest = node->rb_right;
-	else {
-		deepest = rb_next(node);
-		if (deepest->rb_right)
-			deepest = deepest->rb_right;
-		else if (rb_parent(deepest) != node)
-			deepest = rb_parent(deepest);
-	}
-
-	return deepest;
-}
-EXPORT_SYMBOL(rb_augment_erase_begin);
-
-/*
- * after removal, update the tree to account for the removed entry
- * and any rebalance damage.
- */
-void rb_augment_erase_end(struct rb_node *node, rb_augment_f func, void *data)
-{
-	if (node)
-		rb_augment_path(node, func, data);
-}
-EXPORT_SYMBOL(rb_augment_erase_end);
-
 /*
  * This function returns the first node (in sort order) of the tree.
  */
-- 
cgit v1.2.3


From 3908836aa77e3621aaf2101f2920e01d7c8460d6 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:21 -0700
Subject: rbtree: add RB_DECLARE_CALLBACKS() macro

As proposed by Peter Zijlstra, this makes it easier to define the augmented
rbtree callbacks.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/pat_rbtree.c | 37 ++-----------------------------------
 include/linux/rbtree.h   | 30 ++++++++++++++++++++++++++++++
 lib/rbtree_test.c        | 34 ++--------------------------------
 3 files changed, 34 insertions(+), 67 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 7e1515bd4770..4d116959075d 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -69,41 +69,8 @@ static u64 compute_subtree_max_end(struct memtype *data)
 	return max_end;
 }
 
-/* Update 'subtree_max_end' for node and its parents */
-static void memtype_rb_propagate_cb(struct rb_node *node, struct rb_node *stop)
-{
-	while (node != stop) {
-		struct memtype *data = container_of(node, struct memtype, rb);
-		u64 subtree_max_end = compute_subtree_max_end(data);
-		if (data->subtree_max_end == subtree_max_end)
-			break;
-		data->subtree_max_end = subtree_max_end;
-		node = rb_parent(&data->rb);
-	}
-}
-
-static void memtype_rb_copy_cb(struct rb_node *old, struct rb_node *new)
-{
-	struct memtype *old_data = container_of(old, struct memtype, rb);
-	struct memtype *new_data = container_of(new, struct memtype, rb);
-
-	new_data->subtree_max_end = old_data->subtree_max_end;
-}
-
-/* Update 'subtree_max_end' after tree rotation. old and new are the
- * former and current subtree roots */
-static void memtype_rb_rotate_cb(struct rb_node *old, struct rb_node *new)
-{
-	struct memtype *old_data = container_of(old, struct memtype, rb);
-	struct memtype *new_data = container_of(new, struct memtype, rb);
-
-	new_data->subtree_max_end = old_data->subtree_max_end;
-	old_data->subtree_max_end = compute_subtree_max_end(old_data);
-}
-
-static const struct rb_augment_callbacks memtype_rb_augment_cb = {
-	memtype_rb_propagate_cb, memtype_rb_copy_cb, memtype_rb_rotate_cb
-};
+RB_DECLARE_CALLBACKS(static, memtype_rb_augment_cb, struct memtype, rb,
+		     u64, subtree_max_end, compute_subtree_max_end)
 
 /* Find the first (lowest start addr) overlapping range from rb tree */
 static struct memtype *memtype_rb_lowest_match(struct rb_root *root,
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 4ace31b33380..8d1e83b1c87b 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -79,6 +79,36 @@ rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 	__rb_insert_augmented(node, root, augment->rotate);
 }
 
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	      \
+			     rbtype, rbaugmented, rbcompute)		      \
+static void rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)    \
+{									      \
+	while (rb != stop) {						      \
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	      \
+		rbtype augmented = rbcompute(node);			      \
+		if (node->rbaugmented == augmented)			      \
+			break;						      \
+		node->rbaugmented = augmented;				      \
+		rb = rb_parent(&node->rbfield);				      \
+	}								      \
+}									      \
+static void rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)   \
+{									      \
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
+	new->rbaugmented = old->rbaugmented;				      \
+}									      \
+static void rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
+{									      \
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
+	new->rbaugmented = old->rbaugmented;				      \
+	old->rbaugmented = rbcompute(old);				      \
+}									      \
+rbstatic const struct rb_augment_callbacks rbname = {			      \
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	      \
+};
+
 
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index e28345df09bf..b20e99969b0f 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -61,38 +61,8 @@ static inline u32 augment_recompute(struct test_node *node)
 	return max;
 }
 
-static void augment_propagate(struct rb_node *rb, struct rb_node *stop)
-{
-	while (rb != stop) {
-		struct test_node *node = rb_entry(rb, struct test_node, rb);
-		u32 augmented = augment_recompute(node);
-		if (node->augmented == augmented)
-			break;
-		node->augmented = augmented;
-		rb = rb_parent(&node->rb);
-	}
-}
-
-static void augment_copy(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-	struct test_node *old = rb_entry(rb_old, struct test_node, rb);
-	struct test_node *new = rb_entry(rb_new, struct test_node, rb);
-	new->augmented = old->augmented;
-}
-
-static void augment_rotate(struct rb_node *rb_old, struct rb_node *rb_new)
-{
-	struct test_node *old = rb_entry(rb_old, struct test_node, rb);
-	struct test_node *new = rb_entry(rb_new, struct test_node, rb);
-
-	/* Rotation doesn't change subtree's augmented value */
-	new->augmented = old->augmented;
-	old->augmented = augment_recompute(old);
-}
-
-static const struct rb_augment_callbacks augment_callbacks = {
-	augment_propagate, augment_copy, augment_rotate
-};
+RB_DECLARE_CALLBACKS(static, augment_callbacks, struct test_node, rb,
+		     u32, augmented, augment_recompute)
 
 static void insert_augmented(struct test_node *node, struct rb_root *root)
 {
-- 
cgit v1.2.3


From 6b2dbba8b6ac4df26f72eda1e5ea7bab9f950e08 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:25 -0700
Subject: mm: replace vma prio_tree with an interval tree

Implement an interval tree as a replacement for the VMA prio_tree.  The
algorithms are similar to lib/interval_tree.c; however that code can't be
directly reused as the interval endpoints are not explicitly stored in the
VMA.  So instead, the common algorithm is moved into a template and the
details (node type, how to get interval endpoints from the node, etc) are
filled in using the C preprocessor.

Once the interval tree functions are available, using them as a
replacement to the VMA prio tree is a relatively simple, mechanical job.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault-armv.c           |   3 +-
 arch/arm/mm/flush.c                |   3 +-
 arch/parisc/kernel/cache.c         |   3 +-
 arch/x86/mm/hugetlbpage.c          |   3 +-
 fs/hugetlbfs/inode.c               |   9 +-
 fs/inode.c                         |   2 +-
 include/linux/fs.h                 |   6 +-
 include/linux/interval_tree_tmpl.h | 215 +++++++++++++++++++++++++++++++++++++
 include/linux/mm.h                 |  30 +++---
 include/linux/mm_types.h           |  14 +--
 kernel/events/uprobes.c            |   3 +-
 kernel/fork.c                      |   2 +-
 lib/interval_tree.c                | 166 ++--------------------------
 lib/prio_tree.c                    |  19 +---
 mm/Makefile                        |   4 +-
 mm/filemap_xip.c                   |   3 +-
 mm/fremap.c                        |   2 +-
 mm/hugetlb.c                       |   3 +-
 mm/interval_tree.c                 |  61 +++++++++++
 mm/memory-failure.c                |   3 +-
 mm/memory.c                        |   9 +-
 mm/mmap.c                          |  22 ++--
 mm/nommu.c                         |  12 +--
 mm/prio_tree.c                     | 208 -----------------------------------
 mm/rmap.c                          |  18 ++--
 25 files changed, 357 insertions(+), 466 deletions(-)
 create mode 100644 include/linux/interval_tree_tmpl.h
 create mode 100644 mm/interval_tree.c
 delete mode 100644 mm/prio_tree.c

(limited to 'arch/x86')

diff --git a/arch/arm/mm/fault-armv.c b/arch/arm/mm/fault-armv.c
index 7599e2625c7d..2a5907b5c8d2 100644
--- a/arch/arm/mm/fault-armv.c
+++ b/arch/arm/mm/fault-armv.c
@@ -134,7 +134,6 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	unsigned long offset;
 	pgoff_t pgoff;
 	int aliases = 0;
@@ -147,7 +146,7 @@ make_coherent(struct address_space *mapping, struct vm_area_struct *vma,
 	 * cache coherency.
 	 */
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		/*
 		 * If this VMA is not in our MM, we can ignore it.
 		 * Note that we intentionally mask out the VMA
diff --git a/arch/arm/mm/flush.c b/arch/arm/mm/flush.c
index 40ca11ed6e5f..1c8f7f564175 100644
--- a/arch/arm/mm/flush.c
+++ b/arch/arm/mm/flush.c
@@ -196,7 +196,6 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 {
 	struct mm_struct *mm = current->active_mm;
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	pgoff_t pgoff;
 
 	/*
@@ -208,7 +207,7 @@ static void __flush_dcache_aliases(struct address_space *mapping, struct page *p
 	pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long offset;
 
 		/*
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index 9d181890a7e3..48e16dc20102 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -276,7 +276,6 @@ void flush_dcache_page(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
 	struct vm_area_struct *mpnt;
-	struct prio_tree_iter iter;
 	unsigned long offset;
 	unsigned long addr, old_addr = 0;
 	pgoff_t pgoff;
@@ -299,7 +298,7 @@ void flush_dcache_page(struct page *page)
 	 * to flush one address here for them all to become coherent */
 
 	flush_dcache_mmap_lock(mapping);
-	vma_prio_tree_foreach(mpnt, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(mpnt, &mapping->i_mmap, pgoff, pgoff) {
 		offset = (pgoff - mpnt->vm_pgoff) << PAGE_SHIFT;
 		addr = mpnt->vm_start + offset;
 
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c
index b91e48512425..937bff5cdaa7 100644
--- a/arch/x86/mm/hugetlbpage.c
+++ b/arch/x86/mm/hugetlbpage.c
@@ -71,7 +71,6 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 	struct address_space *mapping = vma->vm_file->f_mapping;
 	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
-	struct prio_tree_iter iter;
 	struct vm_area_struct *svma;
 	unsigned long saddr;
 	pte_t *spte = NULL;
@@ -81,7 +80,7 @@ huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
 		return (pte_t *)pmd_alloc(mm, pud, addr);
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(svma, &iter, &mapping->i_mmap, idx, idx) {
+	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
 		if (svma == vma)
 			continue;
 
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0a0ab8e21b19..c5bc355d8243 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -397,17 +397,16 @@ static void hugetlbfs_evict_inode(struct inode *inode)
 }
 
 static inline void
-hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
+hugetlb_vmtruncate_list(struct rb_root *root, pgoff_t pgoff)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 
-	vma_prio_tree_foreach(vma, &iter, root, pgoff, ULONG_MAX) {
+	vma_interval_tree_foreach(vma, root, pgoff, ULONG_MAX) {
 		unsigned long v_offset;
 
 		/*
 		 * Can the expression below overflow on 32-bit arches?
-		 * No, because the prio_tree returns us only those vmas
+		 * No, because the interval tree returns us only those vmas
 		 * which overlap the truncated area starting at pgoff,
 		 * and no vma on a 32-bit arch can span beyond the 4GB.
 		 */
@@ -432,7 +431,7 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 
 	i_size_write(inode, offset);
 	mutex_lock(&mapping->i_mmap_mutex);
-	if (!prio_tree_empty(&mapping->i_mmap))
+	if (!RB_EMPTY_ROOT(&mapping->i_mmap))
 		hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
 	mutex_unlock(&mapping->i_mmap_mutex);
 	truncate_hugepages(inode, offset);
diff --git a/fs/inode.c b/fs/inode.c
index ac8d904b3f16..b03c71957246 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -348,7 +348,7 @@ void address_space_init_once(struct address_space *mapping)
 	mutex_init(&mapping->i_mmap_mutex);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
-	INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
+	mapping->i_mmap = RB_ROOT;
 	INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
 }
 EXPORT_SYMBOL(address_space_init_once);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5a8a273d5b2f..c617ed024df8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -401,7 +401,7 @@ struct inodes_stat_t {
 #include <linux/cache.h>
 #include <linux/list.h>
 #include <linux/radix-tree.h>
-#include <linux/prio_tree.h>
+#include <linux/rbtree.h>
 #include <linux/init.h>
 #include <linux/pid.h>
 #include <linux/bug.h>
@@ -669,7 +669,7 @@ struct address_space {
 	struct radix_tree_root	page_tree;	/* radix tree of all pages */
 	spinlock_t		tree_lock;	/* and lock protecting it */
 	unsigned int		i_mmap_writable;/* count VM_SHARED mappings */
-	struct prio_tree_root	i_mmap;		/* tree of private and shared mappings */
+	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
 	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
 	/* Protected by tree_lock together with the radix tree */
@@ -741,7 +741,7 @@ int mapping_tagged(struct address_space *mapping, int tag);
  */
 static inline int mapping_mapped(struct address_space *mapping)
 {
-	return	!prio_tree_empty(&mapping->i_mmap) ||
+	return	!RB_EMPTY_ROOT(&mapping->i_mmap) ||
 		!list_empty(&mapping->i_mmap_nonlinear);
 }
 
diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h
new file mode 100644
index 000000000000..c65deda31413
--- /dev/null
+++ b/include/linux/interval_tree_tmpl.h
@@ -0,0 +1,215 @@
+/*
+  Interval Trees
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  include/linux/interval_tree_tmpl.h
+*/
+
+/*
+ * Template for implementing interval trees
+ *
+ * ITSTRUCT:   struct type of the interval tree nodes
+ * ITRB:       name of struct rb_node field within ITSTRUCT
+ * ITTYPE:     type of the interval endpoints
+ * ITSUBTREE:  name of ITTYPE field within ITSTRUCT holding last-in-subtree
+ * ITSTART(n): start endpoint of ITSTRUCT node n
+ * ITLAST(n):  last endpoing of ITSTRUCT node n
+ * ITSTATIC:   'static' or empty
+ * ITPREFIX:   prefix to use for the inline tree definitions
+ */
+
+/* IT(name) -> ITPREFIX_name */
+#define _ITNAME(prefix, name) prefix ## _ ## name
+#define ITNAME(prefix, name) _ITNAME(prefix, name)
+#define IT(name) ITNAME(ITPREFIX, name)
+
+/* Callbacks for augmented rbtree insert and remove */
+
+static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
+{
+	ITTYPE max = ITLAST(node), subtree_last;
+	if (node->ITRB.rb_left) {
+		subtree_last = rb_entry(node->ITRB.rb_left,
+					ITSTRUCT, ITRB)->ITSUBTREE;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	if (node->ITRB.rb_right) {
+		subtree_last = rb_entry(node->ITRB.rb_right,
+					ITSTRUCT, ITRB)->ITSUBTREE;
+		if (max < subtree_last)
+			max = subtree_last;
+	}
+	return max;
+}
+
+static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
+{
+	while (rb != stop) {
+		ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
+		ITTYPE subtree_last = IT(compute_subtree_last)(node);
+		if (node->ITSUBTREE == subtree_last)
+			break;
+		node->ITSUBTREE = subtree_last;
+		rb = rb_parent(&node->ITRB);
+	}
+}
+
+static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
+	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
+
+	new->ITSUBTREE = old->ITSUBTREE;
+}
+
+static void IT(augment_rotate)(struct rb_node *rb_old, struct rb_node *rb_new)
+{
+	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
+	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
+
+	new->ITSUBTREE = old->ITSUBTREE;
+	old->ITSUBTREE = IT(compute_subtree_last)(old);
+}
+
+static const struct rb_augment_callbacks IT(augment_callbacks) = {
+	IT(augment_propagate), IT(augment_copy), IT(augment_rotate)
+};
+
+/* Insert / remove interval nodes from the tree */
+
+ITSTATIC void IT(insert)(ITSTRUCT *node, struct rb_root *root)
+{
+	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
+	ITTYPE start = ITSTART(node), last = ITLAST(node);
+	ITSTRUCT *parent;
+
+	while (*link) {
+		rb_parent = *link;
+		parent = rb_entry(rb_parent, ITSTRUCT, ITRB);
+		if (parent->ITSUBTREE < last)
+			parent->ITSUBTREE = last;
+		if (start < ITSTART(parent))
+			link = &parent->ITRB.rb_left;
+		else
+			link = &parent->ITRB.rb_right;
+	}
+
+	node->ITSUBTREE = last;
+	rb_link_node(&node->ITRB, rb_parent, link);
+	rb_insert_augmented(&node->ITRB, root, &IT(augment_callbacks));
+}
+
+ITSTATIC void IT(remove)(ITSTRUCT *node, struct rb_root *root)
+{
+	rb_erase_augmented(&node->ITRB, root, &IT(augment_callbacks));
+}
+
+/*
+ * Iterate over intervals intersecting [start;last]
+ *
+ * Note that a node's interval intersects [start;last] iff:
+ *   Cond1: ITSTART(node) <= last
+ * and
+ *   Cond2: start <= ITLAST(node)
+ */
+
+static ITSTRUCT *IT(subtree_search)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
+{
+	while (true) {
+		/*
+		 * Loop invariant: start <= node->ITSUBTREE
+		 * (Cond2 is satisfied by one of the subtree nodes)
+		 */
+		if (node->ITRB.rb_left) {
+			ITSTRUCT *left = rb_entry(node->ITRB.rb_left,
+						  ITSTRUCT, ITRB);
+			if (start <= left->ITSUBTREE) {
+				/*
+				 * Some nodes in left subtree satisfy Cond2.
+				 * Iterate to find the leftmost such node N.
+				 * If it also satisfies Cond1, that's the match
+				 * we are looking for. Otherwise, there is no
+				 * matching interval as nodes to the right of N
+				 * can't satisfy Cond1 either.
+				 */
+				node = left;
+				continue;
+			}
+		}
+		if (ITSTART(node) <= last) {		/* Cond1 */
+			if (start <= ITLAST(node))	/* Cond2 */
+				return node;	/* node is leftmost match */
+			if (node->ITRB.rb_right) {
+				node = rb_entry(node->ITRB.rb_right,
+						ITSTRUCT, ITRB);
+				if (start <= node->ITSUBTREE)
+					continue;
+			}
+		}
+		return NULL;	/* No match */
+	}
+}
+
+ITSTATIC ITSTRUCT *IT(iter_first)(struct rb_root *root,
+				  ITTYPE start, ITTYPE last)
+{
+	ITSTRUCT *node;
+
+	if (!root->rb_node)
+		return NULL;
+	node = rb_entry(root->rb_node, ITSTRUCT, ITRB);
+	if (node->ITSUBTREE < start)
+		return NULL;
+	return IT(subtree_search)(node, start, last);
+}
+
+ITSTATIC ITSTRUCT *IT(iter_next)(ITSTRUCT *node, ITTYPE start, ITTYPE last)
+{
+	struct rb_node *rb = node->ITRB.rb_right, *prev;
+
+	while (true) {
+		/*
+		 * Loop invariants:
+		 *   Cond1: ITSTART(node) <= last
+		 *   rb == node->ITRB.rb_right
+		 *
+		 * First, search right subtree if suitable
+		 */
+		if (rb) {
+			ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);
+			if (start <= right->ITSUBTREE)
+				return IT(subtree_search)(right, start, last);
+		}
+
+		/* Move up the tree until we come from a node's left child */
+		do {
+			rb = rb_parent(&node->ITRB);
+			if (!rb)
+				return NULL;
+			prev = &node->ITRB;
+			node = rb_entry(rb, ITSTRUCT, ITRB);
+			rb = node->ITRB.rb_right;
+		} while (prev == rb);
+
+		/* Check if the node intersects [start;last] */
+		if (last < ITSTART(node))		/* !Cond1 */
+			return NULL;
+		else if (start <= ITLAST(node))		/* Cond2 */
+			return node;
+	}
+}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5ddb11b2b4bb..0f671ef09eba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -10,7 +10,6 @@
 #include <linux/list.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
-#include <linux/prio_tree.h>
 #include <linux/atomic.h>
 #include <linux/debug_locks.h>
 #include <linux/mm_types.h>
@@ -1355,22 +1354,27 @@ extern void zone_pcp_reset(struct zone *zone);
 extern atomic_long_t mmap_pages_allocated;
 extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
-/* prio_tree.c */
-void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
-void vma_prio_tree_insert(struct vm_area_struct *, struct prio_tree_root *);
-void vma_prio_tree_remove(struct vm_area_struct *, struct prio_tree_root *);
-struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
-	struct prio_tree_iter *iter);
-
-#define vma_prio_tree_foreach(vma, iter, root, begin, end)	\
-	for (prio_tree_iter_init(iter, root, begin, end), vma = NULL;	\
-		(vma = vma_prio_tree_next(vma, iter)); )
+/* interval_tree.c */
+void vma_interval_tree_add(struct vm_area_struct *vma,
+			   struct vm_area_struct *old,
+			   struct address_space *mapping);
+void vma_interval_tree_insert(struct vm_area_struct *node,
+			      struct rb_root *root);
+void vma_interval_tree_remove(struct vm_area_struct *node,
+			      struct rb_root *root);
+struct vm_area_struct *vma_interval_tree_iter_first(struct rb_root *root,
+				unsigned long start, unsigned long last);
+struct vm_area_struct *vma_interval_tree_iter_next(struct vm_area_struct *node,
+				unsigned long start, unsigned long last);
+
+#define vma_interval_tree_foreach(vma, root, start, last)		\
+	for (vma = vma_interval_tree_iter_first(root, start, last);	\
+	     vma; vma = vma_interval_tree_iter_next(vma, start, last))
 
 static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
 					struct list_head *list)
 {
-	vma->shared.vm_set.parent = NULL;
-	list_add_tail(&vma->shared.vm_set.list, list);
+	list_add_tail(&vma->shared.nonlinear, list);
 }
 
 /* mmap.c */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index a57a43f5ca7c..31f8a3af7d94 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -6,7 +6,6 @@
 #include <linux/threads.h>
 #include <linux/list.h>
 #include <linux/spinlock.h>
-#include <linux/prio_tree.h>
 #include <linux/rbtree.h>
 #include <linux/rwsem.h>
 #include <linux/completion.h>
@@ -240,18 +239,15 @@ struct vm_area_struct {
 
 	/*
 	 * For areas with an address space and backing store,
-	 * linkage into the address_space->i_mmap prio tree, or
-	 * linkage to the list of like vmas hanging off its node, or
+	 * linkage into the address_space->i_mmap interval tree, or
 	 * linkage of vma in the address_space->i_mmap_nonlinear list.
 	 */
 	union {
 		struct {
-			struct list_head list;
-			void *parent;	/* aligns with prio_tree_node parent */
-			struct vm_area_struct *head;
-		} vm_set;
-
-		struct raw_prio_tree_node prio_tree_node;
+			struct rb_node rb;
+			unsigned long rb_subtree_last;
+		} linear;
+		struct list_head nonlinear;
 	} shared;
 
 	/*
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 912ef48d28ab..1d9c0a985960 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -735,7 +735,6 @@ static struct map_info *
 build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 {
 	unsigned long pgoff = offset >> PAGE_SHIFT;
-	struct prio_tree_iter iter;
 	struct vm_area_struct *vma;
 	struct map_info *curr = NULL;
 	struct map_info *prev = NULL;
@@ -744,7 +743,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 
  again:
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		if (!valid_vma(vma, is_register))
 			continue;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 972762e01024..90dace52715e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -423,7 +423,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
 				mapping->i_mmap_writable++;
 			flush_dcache_mmap_lock(mapping);
 			/* insert tmp into the share list, just after mpnt */
-			vma_prio_tree_add(tmp, mpnt);
+			vma_interval_tree_add(tmp, mpnt, mapping);
 			flush_dcache_mmap_unlock(mapping);
 			mutex_unlock(&mapping->i_mmap_mutex);
 		}
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index 6fd540b1e499..77a793e0644b 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -1,159 +1,13 @@
 #include <linux/init.h>
 #include <linux/interval_tree.h>
 
-/* Callbacks for augmented rbtree insert and remove */
-
-static inline unsigned long
-compute_subtree_last(struct interval_tree_node *node)
-{
-	unsigned long max = node->last, subtree_last;
-	if (node->rb.rb_left) {
-		subtree_last = rb_entry(node->rb.rb_left,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	if (node->rb.rb_right) {
-		subtree_last = rb_entry(node->rb.rb_right,
-			struct interval_tree_node, rb)->__subtree_last;
-		if (max < subtree_last)
-			max = subtree_last;
-	}
-	return max;
-}
-
-RB_DECLARE_CALLBACKS(static, augment_callbacks, struct interval_tree_node, rb,
-		     unsigned long, __subtree_last, compute_subtree_last)
-
-/* Insert / remove interval nodes from the tree */
-
-void interval_tree_insert(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
-	struct rb_node **link = &root->rb_node, *rb_parent = NULL;
-	unsigned long start = node->start, last = node->last;
-	struct interval_tree_node *parent;
-
-	while (*link) {
-		rb_parent = *link;
-		parent = rb_entry(rb_parent, struct interval_tree_node, rb);
-		if (parent->__subtree_last < last)
-			parent->__subtree_last = last;
-		if (start < parent->start)
-			link = &parent->rb.rb_left;
-		else
-			link = &parent->rb.rb_right;
-	}
-
-	node->__subtree_last = last;
-	rb_link_node(&node->rb, rb_parent, link);
-	rb_insert_augmented(&node->rb, root, &augment_callbacks);
-}
-
-void interval_tree_remove(struct interval_tree_node *node,
-			  struct rb_root *root)
-{
-	rb_erase_augmented(&node->rb, root, &augment_callbacks);
-}
-
-/*
- * Iterate over intervals intersecting [start;last]
- *
- * Note that a node's interval intersects [start;last] iff:
- *   Cond1: node->start <= last
- * and
- *   Cond2: start <= node->last
- */
-
-static struct interval_tree_node *
-subtree_search(struct interval_tree_node *node,
-	       unsigned long start, unsigned long last)
-{
-	while (true) {
-		/*
-		 * Loop invariant: start <= node->__subtree_last
-		 * (Cond2 is satisfied by one of the subtree nodes)
-		 */
-		if (node->rb.rb_left) {
-			struct interval_tree_node *left =
-				rb_entry(node->rb.rb_left,
-					 struct interval_tree_node, rb);
-			if (start <= left->__subtree_last) {
-				/*
-				 * Some nodes in left subtree satisfy Cond2.
-				 * Iterate to find the leftmost such node N.
-				 * If it also satisfies Cond1, that's the match
-				 * we are looking for. Otherwise, there is no
-				 * matching interval as nodes to the right of N
-				 * can't satisfy Cond1 either.
-				 */
-				node = left;
-				continue;
-			}
-		}
-		if (node->start <= last) {		/* Cond1 */
-			if (start <= node->last)	/* Cond2 */
-				return node;	/* node is leftmost match */
-			if (node->rb.rb_right) {
-				node = rb_entry(node->rb.rb_right,
-					struct interval_tree_node, rb);
-				if (start <= node->__subtree_last)
-					continue;
-			}
-		}
-		return NULL;	/* No match */
-	}
-}
-
-struct interval_tree_node *
-interval_tree_iter_first(struct rb_root *root,
-			 unsigned long start, unsigned long last)
-{
-	struct interval_tree_node *node;
-
-	if (!root->rb_node)
-		return NULL;
-	node = rb_entry(root->rb_node, struct interval_tree_node, rb);
-	if (node->__subtree_last < start)
-		return NULL;
-	return subtree_search(node, start, last);
-}
-
-struct interval_tree_node *
-interval_tree_iter_next(struct interval_tree_node *node,
-			unsigned long start, unsigned long last)
-{
-	struct rb_node *rb = node->rb.rb_right, *prev;
-
-	while (true) {
-		/*
-		 * Loop invariants:
-		 *   Cond1: node->start <= last
-		 *   rb == node->rb.rb_right
-		 *
-		 * First, search right subtree if suitable
-		 */
-		if (rb) {
-			struct interval_tree_node *right =
-				rb_entry(rb, struct interval_tree_node, rb);
-			if (start <= right->__subtree_last)
-				return subtree_search(right, start, last);
-		}
-
-		/* Move up the tree until we come from a node's left child */
-		do {
-			rb = rb_parent(&node->rb);
-			if (!rb)
-				return NULL;
-			prev = &node->rb;
-			node = rb_entry(rb, struct interval_tree_node, rb);
-			rb = node->rb.rb_right;
-		} while (prev == rb);
-
-		/* Check if the node intersects [start;last] */
-		if (last < node->start)		/* !Cond1 */
-			return NULL;
-		else if (start <= node->last)	/* Cond2 */
-			return node;
-	}
-}
+#define ITSTRUCT   struct interval_tree_node
+#define ITRB       rb
+#define ITTYPE     unsigned long
+#define ITSUBTREE  __subtree_last
+#define ITSTART(n) ((n)->start)
+#define ITLAST(n)  ((n)->last)
+#define ITSTATIC
+#define ITPREFIX   interval_tree
+
+#include <linux/interval_tree_tmpl.h>
diff --git a/lib/prio_tree.c b/lib/prio_tree.c
index 4e0d2edff2b4..bba37148c15e 100644
--- a/lib/prio_tree.c
+++ b/lib/prio_tree.c
@@ -44,27 +44,12 @@
  * The following macros are used for implementing prio_tree for i_mmap
  */
 
-#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
-#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
-/* avoid overflow */
-#define HEAP_INDEX(vma)	  ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
-
-
 static void get_index(const struct prio_tree_root *root,
     const struct prio_tree_node *node,
     unsigned long *radix, unsigned long *heap)
 {
-	if (root->raw) {
-		struct vm_area_struct *vma = prio_tree_entry(
-		    node, struct vm_area_struct, shared.prio_tree_node);
-
-		*radix = RADIX_INDEX(vma);
-		*heap = HEAP_INDEX(vma);
-	}
-	else {
-		*radix = node->start;
-		*heap = node->last;
-	}
+	*radix = node->start;
+	*heap = node->last;
 }
 
 static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
diff --git a/mm/Makefile b/mm/Makefile
index 92753e2d82da..6b025f80af34 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -14,9 +14,9 @@ endif
 obj-y			:= filemap.o mempool.o oom_kill.o fadvise.o \
 			   maccess.o page_alloc.o page-writeback.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
-			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
+			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o $(mmu-y)
+			   compaction.o interval_tree.o $(mmu-y)
 
 obj-y += init-mm.o
 
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 91750227a191..a52daee11d3f 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -167,7 +167,6 @@ __xip_unmap (struct address_space * mapping,
 {
 	struct vm_area_struct *vma;
 	struct mm_struct *mm;
-	struct prio_tree_iter iter;
 	unsigned long address;
 	pte_t *pte;
 	pte_t pteval;
@@ -184,7 +183,7 @@ __xip_unmap (struct address_space * mapping,
 
 retry:
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		mm = vma->vm_mm;
 		address = vma->vm_start +
 			((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
diff --git a/mm/fremap.c b/mm/fremap.c
index 3d731a498788..3899a86851ce 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -214,7 +214,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
 		vma->vm_flags |= VM_NONLINEAR;
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 		vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f1bb534254f6..c9b40e3a9936 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2474,7 +2474,6 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct hstate *h = hstate_vma(vma);
 	struct vm_area_struct *iter_vma;
 	struct address_space *mapping;
-	struct prio_tree_iter iter;
 	pgoff_t pgoff;
 
 	/*
@@ -2491,7 +2490,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * __unmap_hugepage_range() is called as the lock is already held
 	 */
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(iter_vma, &mapping->i_mmap, pgoff, pgoff) {
 		/* Do not unmap the current VMA */
 		if (iter_vma == vma)
 			continue;
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
new file mode 100644
index 000000000000..7dc565660e56
--- /dev/null
+++ b/mm/interval_tree.c
@@ -0,0 +1,61 @@
+/*
+ * mm/interval_tree.c - interval tree for mapping->i_mmap
+ *
+ * Copyright (C) 2012, Michel Lespinasse <walken@google.com>
+ *
+ * This file is released under the GPL v2.
+ */
+
+#include <linux/mm.h>
+#include <linux/fs.h>
+
+#define ITSTRUCT   struct vm_area_struct
+#define ITRB       shared.linear.rb
+#define ITTYPE     unsigned long
+#define ITSUBTREE  shared.linear.rb_subtree_last
+#define ITSTART(n) ((n)->vm_pgoff)
+#define ITLAST(n)  ((n)->vm_pgoff + \
+		    (((n)->vm_end - (n)->vm_start) >> PAGE_SHIFT) - 1)
+#define ITSTATIC
+#define ITPREFIX   vma_interval_tree
+
+#include <linux/interval_tree_tmpl.h>
+
+/* Insert old immediately after vma in the interval tree */
+void vma_interval_tree_add(struct vm_area_struct *vma,
+			   struct vm_area_struct *old,
+			   struct address_space *mapping)
+{
+	struct rb_node **link;
+	struct vm_area_struct *parent;
+	unsigned long last;
+
+	if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
+		list_add(&vma->shared.nonlinear, &old->shared.nonlinear);
+		return;
+	}
+
+	last = ITLAST(vma);
+
+	if (!old->shared.linear.rb.rb_right) {
+		parent = old;
+		link = &old->shared.linear.rb.rb_right;
+	} else {
+		parent = rb_entry(old->shared.linear.rb.rb_right,
+				  struct vm_area_struct, shared.linear.rb);
+		if (parent->shared.linear.rb_subtree_last < last)
+			parent->shared.linear.rb_subtree_last = last;
+		while (parent->shared.linear.rb.rb_left) {
+			parent = rb_entry(parent->shared.linear.rb.rb_left,
+				struct vm_area_struct, shared.linear.rb);
+			if (parent->shared.linear.rb_subtree_last < last)
+				parent->shared.linear.rb_subtree_last = last;
+		}
+		link = &parent->shared.linear.rb.rb_left;
+	}
+
+	vma->shared.linear.rb_subtree_last = last;
+	rb_link_node(&vma->shared.linear.rb, &parent->shared.linear.rb, link);
+	rb_insert_augmented(&vma->shared.linear.rb, &mapping->i_mmap,
+			    &vma_interval_tree_augment_callbacks);
+}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index a6e2141a6610..c38a6257d082 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -431,7 +431,6 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 {
 	struct vm_area_struct *vma;
 	struct task_struct *tsk;
-	struct prio_tree_iter iter;
 	struct address_space *mapping = page->mapping;
 
 	mutex_lock(&mapping->i_mmap_mutex);
@@ -442,7 +441,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
 		if (!task_early_kill(tsk))
 			continue;
 
-		vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff,
+		vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff,
 				      pgoff) {
 			/*
 			 * Send early kill signal to tasks where a vma covers
diff --git a/mm/memory.c b/mm/memory.c
index e09c04813186..d205e4381a34 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2801,14 +2801,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
 	zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
 }
 
-static inline void unmap_mapping_range_tree(struct prio_tree_root *root,
+static inline void unmap_mapping_range_tree(struct rb_root *root,
 					    struct zap_details *details)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	pgoff_t vba, vea, zba, zea;
 
-	vma_prio_tree_foreach(vma, &iter, root,
+	vma_interval_tree_foreach(vma, root,
 			details->first_index, details->last_index) {
 
 		vba = vma->vm_pgoff;
@@ -2839,7 +2838,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
 	 * across *all* the pages in each nonlinear VMA, not just the pages
 	 * whose virtual address lies outside the file truncation point.
 	 */
-	list_for_each_entry(vma, head, shared.vm_set.list) {
+	list_for_each_entry(vma, head, shared.nonlinear) {
 		details->nonlinear_vma = vma;
 		unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
 	}
@@ -2883,7 +2882,7 @@ void unmap_mapping_range(struct address_space *mapping,
 
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
diff --git a/mm/mmap.c b/mm/mmap.c
index e3c365ff1b6a..5ac533f88e99 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -199,14 +199,14 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 
 	flush_dcache_mmap_lock(mapping);
 	if (unlikely(vma->vm_flags & VM_NONLINEAR))
-		list_del_init(&vma->shared.vm_set.list);
+		list_del_init(&vma->shared.nonlinear);
 	else
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 	flush_dcache_mmap_unlock(mapping);
 }
 
 /*
- * Unlink a file-based vm structure from its prio_tree, to hide
+ * Unlink a file-based vm structure from its interval tree, to hide
  * vma from rmap and vmtruncate before freeing its page tables.
  */
 void unlink_file_vma(struct vm_area_struct *vma)
@@ -411,7 +411,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
 		if (unlikely(vma->vm_flags & VM_NONLINEAR))
 			vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
 		else
-			vma_prio_tree_insert(vma, &mapping->i_mmap);
+			vma_interval_tree_insert(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 	}
 }
@@ -449,7 +449,7 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
 
 /*
  * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
- * mm's list and rbtree.  It has already been inserted into the prio_tree.
+ * mm's list and rbtree.  It has already been inserted into the interval tree.
  */
 static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
@@ -491,7 +491,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
 	struct vm_area_struct *next = vma->vm_next;
 	struct vm_area_struct *importer = NULL;
 	struct address_space *mapping = NULL;
-	struct prio_tree_root *root = NULL;
+	struct rb_root *root = NULL;
 	struct anon_vma *anon_vma = NULL;
 	struct file *file = vma->vm_file;
 	long adjust_next = 0;
@@ -554,7 +554,7 @@ again:			remove_next = 1 + (end > next->vm_end);
 		mutex_lock(&mapping->i_mmap_mutex);
 		if (insert) {
 			/*
-			 * Put into prio_tree now, so instantiated pages
+			 * Put into interval tree now, so instantiated pages
 			 * are visible to arm/parisc __flush_dcache_page
 			 * throughout; but we cannot insert into address
 			 * space until vma start or end is updated.
@@ -582,9 +582,9 @@ again:			remove_next = 1 + (end > next->vm_end);
 
 	if (root) {
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_remove(vma, root);
+		vma_interval_tree_remove(vma, root);
 		if (adjust_next)
-			vma_prio_tree_remove(next, root);
+			vma_interval_tree_remove(next, root);
 	}
 
 	vma->vm_start = start;
@@ -597,8 +597,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 
 	if (root) {
 		if (adjust_next)
-			vma_prio_tree_insert(next, root);
-		vma_prio_tree_insert(vma, root);
+			vma_interval_tree_insert(next, root);
+		vma_interval_tree_insert(vma, root);
 		flush_dcache_mmap_unlock(mapping);
 	}
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 12e84e69dd06..45131b41bcdb 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -698,7 +698,7 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
 
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_insert(vma, &mapping->i_mmap);
+		vma_interval_tree_insert(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
@@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
 
 		mutex_lock(&mapping->i_mmap_mutex);
 		flush_dcache_mmap_lock(mapping);
-		vma_prio_tree_remove(vma, &mapping->i_mmap);
+		vma_interval_tree_remove(vma, &mapping->i_mmap);
 		flush_dcache_mmap_unlock(mapping);
 		mutex_unlock(&mapping->i_mmap_mutex);
 	}
@@ -2044,7 +2044,6 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 				size_t newsize)
 {
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	struct vm_region *region;
 	pgoff_t low, high;
 	size_t r_size, r_top;
@@ -2056,8 +2055,7 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	mutex_lock(&inode->i_mapping->i_mmap_mutex);
 
 	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      low, high) {
+	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap, low, high) {
 		/* found one - only interested if it's shared out of the page
 		 * cache */
 		if (vma->vm_flags & VM_SHARED) {
@@ -2073,8 +2071,8 @@ int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
 	 * we don't check for any regions that start beyond the EOF as there
 	 * shouldn't be any
 	 */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      0, ULONG_MAX) {
+	vma_interval_tree_foreach(vma, &inode->i_mapping->i_mmap,
+				  0, ULONG_MAX) {
 		if (!(vma->vm_flags & VM_SHARED))
 			continue;
 
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
deleted file mode 100644
index 799dcfd7cd8c..000000000000
--- a/mm/prio_tree.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- * mm/prio_tree.c - priority search tree for mapping->i_mmap
- *
- * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
- *
- * This file is released under the GPL v2.
- *
- * Based on the radix priority search tree proposed by Edward M. McCreight
- * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
- *
- * 02Feb2004	Initial version
- */
-
-#include <linux/mm.h>
-#include <linux/prio_tree.h>
-#include <linux/prefetch.h>
-
-/*
- * See lib/prio_tree.c for details on the general radix priority search tree
- * code.
- */
-
-/*
- * The following #defines are mirrored from lib/prio_tree.c. They're only used
- * for debugging, and should be removed (along with the debugging code using
- * them) when switching also VMAs to the regular prio_tree code.
- */
-
-#define RADIX_INDEX(vma)  ((vma)->vm_pgoff)
-#define VMA_SIZE(vma)	  (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
-/* avoid overflow */
-#define HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
-
-/*
- * Radix priority search tree for address_space->i_mmap
- *
- * For each vma that map a unique set of file pages i.e., unique [radix_index,
- * heap_index] value, we have a corresponding priority search tree node. If
- * multiple vmas have identical [radix_index, heap_index] value, then one of
- * them is used as a tree node and others are stored in a vm_set list. The tree
- * node points to the first vma (head) of the list using vm_set.head.
- *
- * prio_tree_root
- *      |
- *      A       vm_set.head
- *     / \      /
- *    L   R -> H-I-J-K-M-N-O-P-Q-S
- *    ^   ^    <-- vm_set.list -->
- *  tree nodes
- *
- * We need some way to identify whether a vma is a tree node, head of a vm_set
- * list, or just a member of a vm_set list. We cannot use vm_flags to store
- * such information. The reason is, in the above figure, it is possible that
- * vm_flags' of R and H are covered by the different mmap_sems. When R is
- * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
- * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
- * That's why some trick involving shared.vm_set.parent is used for identifying
- * tree nodes and list head nodes.
- *
- * vma radix priority search tree node rules:
- *
- * vma->shared.vm_set.parent != NULL    ==> a tree node
- *      vma->shared.vm_set.head != NULL ==> list of others mapping same range
- *      vma->shared.vm_set.head == NULL ==> no others map the same range
- *
- * vma->shared.vm_set.parent == NULL
- * 	vma->shared.vm_set.head != NULL ==> list head of vmas mapping same range
- * 	vma->shared.vm_set.head == NULL ==> a list node
- */
-
-/*
- * Add a new vma known to map the same set of pages as the old vma:
- * useful for fork's dup_mmap as well as vma_prio_tree_insert below.
- * Note that it just happens to work correctly on i_mmap_nonlinear too.
- */
-void vma_prio_tree_add(struct vm_area_struct *vma, struct vm_area_struct *old)
-{
-	/* Leave these BUG_ONs till prio_tree patch stabilizes */
-	BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
-	BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
-
-	vma->shared.vm_set.head = NULL;
-	vma->shared.vm_set.parent = NULL;
-
-	if (!old->shared.vm_set.parent)
-		list_add(&vma->shared.vm_set.list,
-				&old->shared.vm_set.list);
-	else if (old->shared.vm_set.head)
-		list_add_tail(&vma->shared.vm_set.list,
-				&old->shared.vm_set.head->shared.vm_set.list);
-	else {
-		INIT_LIST_HEAD(&vma->shared.vm_set.list);
-		vma->shared.vm_set.head = old;
-		old->shared.vm_set.head = vma;
-	}
-}
-
-void vma_prio_tree_insert(struct vm_area_struct *vma,
-			  struct prio_tree_root *root)
-{
-	struct prio_tree_node *ptr;
-	struct vm_area_struct *old;
-
-	vma->shared.vm_set.head = NULL;
-
-	ptr = raw_prio_tree_insert(root, &vma->shared.prio_tree_node);
-	if (ptr != (struct prio_tree_node *) &vma->shared.prio_tree_node) {
-		old = prio_tree_entry(ptr, struct vm_area_struct,
-					shared.prio_tree_node);
-		vma_prio_tree_add(vma, old);
-	}
-}
-
-void vma_prio_tree_remove(struct vm_area_struct *vma,
-			  struct prio_tree_root *root)
-{
-	struct vm_area_struct *node, *head, *new_head;
-
-	if (!vma->shared.vm_set.head) {
-		if (!vma->shared.vm_set.parent)
-			list_del_init(&vma->shared.vm_set.list);
-		else
-			raw_prio_tree_remove(root, &vma->shared.prio_tree_node);
-	} else {
-		/* Leave this BUG_ON till prio_tree patch stabilizes */
-		BUG_ON(vma->shared.vm_set.head->shared.vm_set.head != vma);
-		if (vma->shared.vm_set.parent) {
-			head = vma->shared.vm_set.head;
-			if (!list_empty(&head->shared.vm_set.list)) {
-				new_head = list_entry(
-					head->shared.vm_set.list.next,
-					struct vm_area_struct,
-					shared.vm_set.list);
-				list_del_init(&head->shared.vm_set.list);
-			} else
-				new_head = NULL;
-
-			raw_prio_tree_replace(root, &vma->shared.prio_tree_node,
-					&head->shared.prio_tree_node);
-			head->shared.vm_set.head = new_head;
-			if (new_head)
-				new_head->shared.vm_set.head = head;
-
-		} else {
-			node = vma->shared.vm_set.head;
-			if (!list_empty(&vma->shared.vm_set.list)) {
-				new_head = list_entry(
-					vma->shared.vm_set.list.next,
-					struct vm_area_struct,
-					shared.vm_set.list);
-				list_del_init(&vma->shared.vm_set.list);
-				node->shared.vm_set.head = new_head;
-				new_head->shared.vm_set.head = node;
-			} else
-				node->shared.vm_set.head = NULL;
-		}
-	}
-}
-
-/*
- * Helper function to enumerate vmas that map a given file page or a set of
- * contiguous file pages. The function returns vmas that at least map a single
- * page in the given range of contiguous file pages.
- */
-struct vm_area_struct *vma_prio_tree_next(struct vm_area_struct *vma,
-					struct prio_tree_iter *iter)
-{
-	struct prio_tree_node *ptr;
-	struct vm_area_struct *next;
-
-	if (!vma) {
-		/*
-		 * First call is with NULL vma
-		 */
-		ptr = prio_tree_next(iter);
-		if (ptr) {
-			next = prio_tree_entry(ptr, struct vm_area_struct,
-						shared.prio_tree_node);
-			prefetch(next->shared.vm_set.head);
-			return next;
-		} else
-			return NULL;
-	}
-
-	if (vma->shared.vm_set.parent) {
-		if (vma->shared.vm_set.head) {
-			next = vma->shared.vm_set.head;
-			prefetch(next->shared.vm_set.list.next);
-			return next;
-		}
-	} else {
-		next = list_entry(vma->shared.vm_set.list.next,
-				struct vm_area_struct, shared.vm_set.list);
-		if (!next->shared.vm_set.head) {
-			prefetch(next->shared.vm_set.list.next);
-			return next;
-		}
-	}
-
-	ptr = prio_tree_next(iter);
-	if (ptr) {
-		next = prio_tree_entry(ptr, struct vm_area_struct,
-					shared.prio_tree_node);
-		prefetch(next->shared.vm_set.head);
-		return next;
-	} else
-		return NULL;
-}
diff --git a/mm/rmap.c b/mm/rmap.c
index 0f3b7cda2a24..7b5b51d25fc5 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -820,7 +820,6 @@ static int page_referenced_file(struct page *page,
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int referenced = 0;
 
 	/*
@@ -846,7 +845,7 @@ static int page_referenced_file(struct page *page,
 	 */
 	mapcount = page_mapcount(page);
 
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -945,13 +944,12 @@ static int page_mkclean_file(struct address_space *mapping, struct page *page)
 {
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = 0;
 
 	BUG_ON(PageAnon(page));
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		if (vma->vm_flags & VM_SHARED) {
 			unsigned long address = vma_address(page, vma);
 			if (address == -EFAULT)
@@ -1547,7 +1545,6 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = SWAP_AGAIN;
 	unsigned long cursor;
 	unsigned long max_nl_cursor = 0;
@@ -1555,7 +1552,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	unsigned int mapcount;
 
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
@@ -1576,7 +1573,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 		goto out;
 
 	list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-						shared.vm_set.list) {
+							shared.nonlinear) {
 		cursor = (unsigned long) vma->vm_private_data;
 		if (cursor > max_nl_cursor)
 			max_nl_cursor = cursor;
@@ -1608,7 +1605,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 
 	do {
 		list_for_each_entry(vma, &mapping->i_mmap_nonlinear,
-						shared.vm_set.list) {
+							shared.nonlinear) {
 			cursor = (unsigned long) vma->vm_private_data;
 			while ( cursor < max_nl_cursor &&
 				cursor < vma->vm_end - vma->vm_start) {
@@ -1631,7 +1628,7 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
 	 * in locked vmas).  Reset cursor on all unreserved nonlinear
 	 * vmas, now forgetting on which ones it had fallen behind.
 	 */
-	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+	list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.nonlinear)
 		vma->vm_private_data = NULL;
 out:
 	mutex_unlock(&mapping->i_mmap_mutex);
@@ -1748,13 +1745,12 @@ static int rmap_walk_file(struct page *page, int (*rmap_one)(struct page *,
 	struct address_space *mapping = page->mapping;
 	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
 	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
 	int ret = SWAP_AGAIN;
 
 	if (!mapping)
 		return ret;
 	mutex_lock(&mapping->i_mmap_mutex);
-	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 		if (address == -EFAULT)
 			continue;
-- 
cgit v1.2.3


From 9c079add0d0f45220f4bb37febf0621137ec2d38 Mon Sep 17 00:00:00 2001
From: Michel Lespinasse <walken@google.com>
Date: Mon, 8 Oct 2012 16:31:33 -0700
Subject: rbtree: move augmented rbtree functionality to rbtree_augmented.h

Provide rb_insert_augmented() and rb_erase_augmented() through a new
rbtree_augmented.h include file.  rb_erase_augmented() is defined there as
an __always_inline function, in order to allow inlining of augmented
rbtree callbacks into it.  Since this generates a relatively large
function, each augmented rbtree user should make sure to have a single
call site.

Signed-off-by: Michel Lespinasse <walken@google.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hillf Danton <dhillf@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/rbtree.txt           |  13 +++
 arch/x86/mm/pat_rbtree.c           |   2 +-
 include/linux/interval_tree_tmpl.h |   8 +-
 include/linux/rbtree.h             |  48 --------
 include/linux/rbtree_augmented.h   | 223 +++++++++++++++++++++++++++++++++++++
 lib/rbtree.c                       | 162 ++-------------------------
 lib/rbtree_test.c                  |   2 +-
 7 files changed, 255 insertions(+), 203 deletions(-)
 create mode 100644 include/linux/rbtree_augmented.h

(limited to 'arch/x86')

diff --git a/Documentation/rbtree.txt b/Documentation/rbtree.txt
index 0a0b6dce3e08..61b6c48871a0 100644
--- a/Documentation/rbtree.txt
+++ b/Documentation/rbtree.txt
@@ -202,6 +202,14 @@ An rbtree user who wants this feature will have to call the augmentation
 functions with the user provided augmentation callback when inserting
 and erasing nodes.
 
+C files implementing augmented rbtree manipulation must include
+<linux/rbtree_augmented.h> instead of <linus/rbtree.h>. Note that
+linux/rbtree_augmented.h exposes some rbtree implementations details
+you are not expected to rely on; please stick to the documented APIs
+there and do not include <linux/rbtree_augmented.h> from header files
+either so as to minimize chances of your users accidentally relying on
+such implementation details.
+
 On insertion, the user must update the augmented information on the path
 leading to the inserted node, then call rb_link_node() as usual and
 rb_augment_inserted() instead of the usual rb_insert_color() call.
@@ -227,6 +235,11 @@ In both cases, the callbacks are provided through struct rb_augment_callbacks.
   subtree to a newly assigned subtree root AND recomputes the augmented
   information for the former subtree root.
 
+The compiled code for rb_erase_augmented() may inline the propagation and
+copy callbacks, which results in a large function, so each augmented rbtree
+user should have a single rb_erase_augmented() call site in order to limit
+compiled code size.
+
 
 Sample usage:
 
diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c
index 4d116959075d..415f6c4ced36 100644
--- a/arch/x86/mm/pat_rbtree.c
+++ b/arch/x86/mm/pat_rbtree.c
@@ -12,7 +12,7 @@
 #include <linux/debugfs.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/sched.h>
 #include <linux/gfp.h>
 
diff --git a/include/linux/interval_tree_tmpl.h b/include/linux/interval_tree_tmpl.h
index c65deda31413..c1aeb922d65f 100644
--- a/include/linux/interval_tree_tmpl.h
+++ b/include/linux/interval_tree_tmpl.h
@@ -19,6 +19,8 @@
   include/linux/interval_tree_tmpl.h
 */
 
+#include <linux/rbtree_augmented.h>
+
 /*
  * Template for implementing interval trees
  *
@@ -57,7 +59,8 @@ static inline ITTYPE IT(compute_subtree_last)(ITSTRUCT *node)
 	return max;
 }
 
-static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
+static inline void
+IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
 {
 	while (rb != stop) {
 		ITSTRUCT *node = rb_entry(rb, ITSTRUCT, ITRB);
@@ -69,7 +72,8 @@ static void IT(augment_propagate)(struct rb_node *rb, struct rb_node *stop)
 	}
 }
 
-static void IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
+static inline void
+IT(augment_copy)(struct rb_node *rb_old, struct rb_node *rb_new)
 {
 	ITSTRUCT *old = rb_entry(rb_old, ITSTRUCT, ITRB);
 	ITSTRUCT *new = rb_entry(rb_new, ITSTRUCT, ITRB);
diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 8d1e83b1c87b..0022c1bb1e26 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -62,54 +62,6 @@ extern void rb_insert_color(struct rb_node *, struct rb_root *);
 extern void rb_erase(struct rb_node *, struct rb_root *);
 
 
-struct rb_augment_callbacks {
-	void (*propagate)(struct rb_node *node, struct rb_node *stop);
-	void (*copy)(struct rb_node *old, struct rb_node *new);
-	void (*rotate)(struct rb_node *old, struct rb_node *new);
-};
-
-extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
-extern void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-			       const struct rb_augment_callbacks *augment);
-static inline void
-rb_insert_augmented(struct rb_node *node, struct rb_root *root,
-		    const struct rb_augment_callbacks *augment)
-{
-	__rb_insert_augmented(node, root, augment->rotate);
-}
-
-#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	      \
-			     rbtype, rbaugmented, rbcompute)		      \
-static void rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)    \
-{									      \
-	while (rb != stop) {						      \
-		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	      \
-		rbtype augmented = rbcompute(node);			      \
-		if (node->rbaugmented == augmented)			      \
-			break;						      \
-		node->rbaugmented = augmented;				      \
-		rb = rb_parent(&node->rbfield);				      \
-	}								      \
-}									      \
-static void rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)   \
-{									      \
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
-	new->rbaugmented = old->rbaugmented;				      \
-}									      \
-static void rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \
-{									      \
-	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		      \
-	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		      \
-	new->rbaugmented = old->rbaugmented;				      \
-	old->rbaugmented = rbcompute(old);				      \
-}									      \
-rbstatic const struct rb_augment_callbacks rbname = {			      \
-	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	      \
-};
-
-
 /* Find logical next and previous nodes in a tree */
 extern struct rb_node *rb_next(const struct rb_node *);
 extern struct rb_node *rb_prev(const struct rb_node *);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
new file mode 100644
index 000000000000..214caa33433b
--- /dev/null
+++ b/include/linux/rbtree_augmented.h
@@ -0,0 +1,223 @@
+/*
+  Red Black Trees
+  (C) 1999  Andrea Arcangeli <andrea@suse.de>
+  (C) 2002  David Woodhouse <dwmw2@infradead.org>
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; either version 2 of the License, or
+  (at your option) any later version.
+
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+  linux/include/linux/rbtree_augmented.h
+*/
+
+#ifndef _LINUX_RBTREE_AUGMENTED_H
+#define _LINUX_RBTREE_AUGMENTED_H
+
+#include <linux/rbtree.h>
+
+/*
+ * Please note - only struct rb_augment_callbacks and the prototypes for
+ * rb_insert_augmented() and rb_erase_augmented() are intended to be public.
+ * The rest are implementation details you are not expected to depend on.
+ *
+ * See Documentation/rbtree.txt for documentation and samples.
+ */
+
+struct rb_augment_callbacks {
+	void (*propagate)(struct rb_node *node, struct rb_node *stop);
+	void (*copy)(struct rb_node *old, struct rb_node *new);
+	void (*rotate)(struct rb_node *old, struct rb_node *new);
+};
+
+extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+static inline void
+rb_insert_augmented(struct rb_node *node, struct rb_root *root,
+		    const struct rb_augment_callbacks *augment)
+{
+	__rb_insert_augmented(node, root, augment->rotate);
+}
+
+#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield,	\
+			     rbtype, rbaugmented, rbcompute)		\
+static inline void							\
+rbname ## _propagate(struct rb_node *rb, struct rb_node *stop)		\
+{									\
+	while (rb != stop) {						\
+		rbstruct *node = rb_entry(rb, rbstruct, rbfield);	\
+		rbtype augmented = rbcompute(node);			\
+		if (node->rbaugmented == augmented)			\
+			break;						\
+		node->rbaugmented = augmented;				\
+		rb = rb_parent(&node->rbfield);				\
+	}								\
+}									\
+static inline void							\
+rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new)		\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+}									\
+static void								\
+rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new)	\
+{									\
+	rbstruct *old = rb_entry(rb_old, rbstruct, rbfield);		\
+	rbstruct *new = rb_entry(rb_new, rbstruct, rbfield);		\
+	new->rbaugmented = old->rbaugmented;				\
+	old->rbaugmented = rbcompute(old);				\
+}									\
+rbstatic const struct rb_augment_callbacks rbname = {			\
+	rbname ## _propagate, rbname ## _copy, rbname ## _rotate	\
+};
+
+
+#define	RB_RED		0
+#define	RB_BLACK	1
+
+#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
+
+#define __rb_color(pc)     ((pc) & 1)
+#define __rb_is_black(pc)  __rb_color(pc)
+#define __rb_is_red(pc)    (!__rb_color(pc))
+#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
+#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
+#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
+
+static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
+{
+	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
+}
+
+static inline void rb_set_parent_color(struct rb_node *rb,
+				       struct rb_node *p, int color)
+{
+	rb->__rb_parent_color = (unsigned long)p | color;
+}
+
+static inline void
+__rb_change_child(struct rb_node *old, struct rb_node *new,
+		  struct rb_node *parent, struct rb_root *root)
+{
+	if (parent) {
+		if (parent->rb_left == old)
+			parent->rb_left = new;
+		else
+			parent->rb_right = new;
+	} else
+		root->rb_node = new;
+}
+
+extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root,
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+
+static __always_inline void
+rb_erase_augmented(struct rb_node *node, struct rb_root *root,
+		   const struct rb_augment_callbacks *augment)
+{
+	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
+	struct rb_node *parent, *rebalance;
+	unsigned long pc;
+
+	if (!tmp) {
+		/*
+		 * Case 1: node to erase has no more than 1 child (easy!)
+		 *
+		 * Note that if there is one child it must be red due to 5)
+		 * and node must be black due to 4). We adjust colors locally
+		 * so as to bypass __rb_erase_color() later on.
+		 */
+		pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, child, parent, root);
+		if (child) {
+			child->__rb_parent_color = pc;
+			rebalance = NULL;
+		} else
+			rebalance = __rb_is_black(pc) ? parent : NULL;
+		tmp = parent;
+	} else if (!child) {
+		/* Still case 1, but this time the child is node->rb_left */
+		tmp->__rb_parent_color = pc = node->__rb_parent_color;
+		parent = __rb_parent(pc);
+		__rb_change_child(node, tmp, parent, root);
+		rebalance = NULL;
+		tmp = parent;
+	} else {
+		struct rb_node *successor = child, *child2;
+		tmp = child->rb_left;
+		if (!tmp) {
+			/*
+			 * Case 2: node's successor is its right child
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (s)  ->  (x) (c)
+			 *        \
+			 *        (c)
+			 */
+			parent = successor;
+			child2 = successor->rb_right;
+			augment->copy(node, successor);
+		} else {
+			/*
+			 * Case 3: node's successor is leftmost under
+			 * node's right child subtree
+			 *
+			 *    (n)          (s)
+			 *    / \          / \
+			 *  (x) (y)  ->  (x) (y)
+			 *      /            /
+			 *    (p)          (p)
+			 *    /            /
+			 *  (s)          (c)
+			 *    \
+			 *    (c)
+			 */
+			do {
+				parent = successor;
+				successor = tmp;
+				tmp = tmp->rb_left;
+			} while (tmp);
+			parent->rb_left = child2 = successor->rb_right;
+			successor->rb_right = child;
+			rb_set_parent(child, successor);
+			augment->copy(node, successor);
+			augment->propagate(parent, successor);
+		}
+
+		successor->rb_left = tmp = node->rb_left;
+		rb_set_parent(tmp, successor);
+
+		pc = node->__rb_parent_color;
+		tmp = __rb_parent(pc);
+		__rb_change_child(node, successor, tmp, root);
+		if (child2) {
+			successor->__rb_parent_color = pc;
+			rb_set_parent_color(child2, parent, RB_BLACK);
+			rebalance = NULL;
+		} else {
+			unsigned long pc2 = successor->__rb_parent_color;
+			successor->__rb_parent_color = pc;
+			rebalance = __rb_is_black(pc2) ? parent : NULL;
+		}
+		tmp = successor;
+	}
+
+	augment->propagate(tmp, NULL);
+	if (rebalance)
+		__rb_erase_color(rebalance, root, augment->rotate);
+}
+
+#endif	/* _LINUX_RBTREE_AUGMENTED_H */
diff --git a/lib/rbtree.c b/lib/rbtree.c
index c0088ca345f9..4f56a11d67fa 100644
--- a/lib/rbtree.c
+++ b/lib/rbtree.c
@@ -21,7 +21,7 @@
   linux/lib/rbtree.c
 */
 
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/export.h>
 
 /*
@@ -44,52 +44,16 @@
  *  parentheses and have some accompanying text comment.
  */
 
-#define	RB_RED		0
-#define	RB_BLACK	1
-
-#define __rb_parent(pc)    ((struct rb_node *)(pc & ~3))
-
-#define __rb_color(pc)     ((pc) & 1)
-#define __rb_is_black(pc)  __rb_color(pc)
-#define __rb_is_red(pc)    (!__rb_color(pc))
-#define rb_color(rb)       __rb_color((rb)->__rb_parent_color)
-#define rb_is_red(rb)      __rb_is_red((rb)->__rb_parent_color)
-#define rb_is_black(rb)    __rb_is_black((rb)->__rb_parent_color)
-
 static inline void rb_set_black(struct rb_node *rb)
 {
 	rb->__rb_parent_color |= RB_BLACK;
 }
 
-static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p)
-{
-	rb->__rb_parent_color = rb_color(rb) | (unsigned long)p;
-}
-
-static inline void rb_set_parent_color(struct rb_node *rb,
-				       struct rb_node *p, int color)
-{
-	rb->__rb_parent_color = (unsigned long)p | color;
-}
-
 static inline struct rb_node *rb_red_parent(struct rb_node *red)
 {
 	return (struct rb_node *)red->__rb_parent_color;
 }
 
-static inline void
-__rb_change_child(struct rb_node *old, struct rb_node *new,
-		  struct rb_node *parent, struct rb_root *root)
-{
-	if (parent) {
-		if (parent->rb_left == old)
-			parent->rb_left = new;
-		else
-			parent->rb_right = new;
-	} else
-		root->rb_node = new;
-}
-
 /*
  * Helper function for rotations:
  * - old's parent and color get assigned to new
@@ -230,9 +194,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root,
 	}
 }
 
-static __always_inline void
+__always_inline void
 __rb_erase_color(struct rb_node *parent, struct rb_root *root,
-		 const struct rb_augment_callbacks *augment)
+	void (*augment_rotate)(struct rb_node *old, struct rb_node *new))
 {
 	struct rb_node *node = NULL, *sibling, *tmp1, *tmp2;
 
@@ -261,7 +225,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent_color(tmp1, parent, RB_BLACK);
 				__rb_rotate_set_parents(parent, sibling, root,
 							RB_RED);
-				augment->rotate(parent, sibling);
+				augment_rotate(parent, sibling);
 				sibling = tmp1;
 			}
 			tmp1 = sibling->rb_right;
@@ -313,7 +277,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				if (tmp1)
 					rb_set_parent_color(tmp1, sibling,
 							    RB_BLACK);
-				augment->rotate(sibling, tmp2);
+				augment_rotate(sibling, tmp2);
 				tmp1 = sibling;
 				sibling = tmp2;
 			}
@@ -336,7 +300,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent(tmp2, parent);
 			__rb_rotate_set_parents(parent, sibling, root,
 						RB_BLACK);
-			augment->rotate(parent, sibling);
+			augment_rotate(parent, sibling);
 			break;
 		} else {
 			sibling = parent->rb_left;
@@ -347,7 +311,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent_color(tmp1, parent, RB_BLACK);
 				__rb_rotate_set_parents(parent, sibling, root,
 							RB_RED);
-				augment->rotate(parent, sibling);
+				augment_rotate(parent, sibling);
 				sibling = tmp1;
 			}
 			tmp1 = sibling->rb_left;
@@ -374,7 +338,7 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				if (tmp1)
 					rb_set_parent_color(tmp1, sibling,
 							    RB_BLACK);
-				augment->rotate(sibling, tmp2);
+				augment_rotate(sibling, tmp2);
 				tmp1 = sibling;
 				sibling = tmp2;
 			}
@@ -386,109 +350,12 @@ __rb_erase_color(struct rb_node *parent, struct rb_root *root,
 				rb_set_parent(tmp2, parent);
 			__rb_rotate_set_parents(parent, sibling, root,
 						RB_BLACK);
-			augment->rotate(parent, sibling);
+			augment_rotate(parent, sibling);
 			break;
 		}
 	}
 }
-
-static __always_inline void
-__rb_erase(struct rb_node *node, struct rb_root *root,
-	   const struct rb_augment_callbacks *augment)
-{
-	struct rb_node *child = node->rb_right, *tmp = node->rb_left;
-	struct rb_node *parent, *rebalance;
-	unsigned long pc;
-
-	if (!tmp) {
-		/*
-		 * Case 1: node to erase has no more than 1 child (easy!)
-		 *
-		 * Note that if there is one child it must be red due to 5)
-		 * and node must be black due to 4). We adjust colors locally
-		 * so as to bypass __rb_erase_color() later on.
-		 */
-		pc = node->__rb_parent_color;
-		parent = __rb_parent(pc);
-		__rb_change_child(node, child, parent, root);
-		if (child) {
-			child->__rb_parent_color = pc;
-			rebalance = NULL;
-		} else
-			rebalance = __rb_is_black(pc) ? parent : NULL;
-		tmp = parent;
-	} else if (!child) {
-		/* Still case 1, but this time the child is node->rb_left */
-		tmp->__rb_parent_color = pc = node->__rb_parent_color;
-		parent = __rb_parent(pc);
-		__rb_change_child(node, tmp, parent, root);
-		rebalance = NULL;
-		tmp = parent;
-	} else {
-		struct rb_node *successor = child, *child2;
-		tmp = child->rb_left;
-		if (!tmp) {
-			/*
-			 * Case 2: node's successor is its right child
-			 *
-			 *    (n)          (s)
-			 *    / \          / \
-			 *  (x) (s)  ->  (x) (c)
-			 *        \
-			 *        (c)
-			 */
-			parent = successor;
-			child2 = successor->rb_right;
-			augment->copy(node, successor);
-		} else {
-			/*
-			 * Case 3: node's successor is leftmost under
-			 * node's right child subtree
-			 *
-			 *    (n)          (s)
-			 *    / \          / \
-			 *  (x) (y)  ->  (x) (y)
-			 *      /            /
-			 *    (p)          (p)
-			 *    /            /
-			 *  (s)          (c)
-			 *    \
-			 *    (c)
-			 */
-			do {
-				parent = successor;
-				successor = tmp;
-				tmp = tmp->rb_left;
-			} while (tmp);
-			parent->rb_left = child2 = successor->rb_right;
-			successor->rb_right = child;
-			rb_set_parent(child, successor);
-			augment->copy(node, successor);
-			augment->propagate(parent, successor);
-		}
-
-		successor->rb_left = tmp = node->rb_left;
-		rb_set_parent(tmp, successor);
-
-		pc = node->__rb_parent_color;
-		tmp = __rb_parent(pc);
-		__rb_change_child(node, successor, tmp, root);
-		if (child2) {
-			successor->__rb_parent_color = pc;
-			rb_set_parent_color(child2, parent, RB_BLACK);
-			rebalance = NULL;
-		} else {
-			unsigned long pc2 = successor->__rb_parent_color;
-			successor->__rb_parent_color = pc;
-			rebalance = __rb_is_black(pc2) ? parent : NULL;
-		}
-		tmp = successor;
-	}
-
-	augment->propagate(tmp, NULL);
-	if (rebalance)
-		__rb_erase_color(rebalance, root, augment);
-}
+EXPORT_SYMBOL(__rb_erase_color);
 
 /*
  * Non-augmented rbtree manipulation functions.
@@ -513,7 +380,7 @@ EXPORT_SYMBOL(rb_insert_color);
 
 void rb_erase(struct rb_node *node, struct rb_root *root)
 {
-	__rb_erase(node, root, &dummy_callbacks);
+	rb_erase_augmented(node, root, &dummy_callbacks);
 }
 EXPORT_SYMBOL(rb_erase);
 
@@ -531,13 +398,6 @@ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 }
 EXPORT_SYMBOL(__rb_insert_augmented);
 
-void rb_erase_augmented(struct rb_node *node, struct rb_root *root,
-			const struct rb_augment_callbacks *augment)
-{
-	__rb_erase(node, root, augment);
-}
-EXPORT_SYMBOL(rb_erase_augmented);
-
 /*
  * This function returns the first node (in sort order) of the tree.
  */
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index b20e99969b0f..268b23951fec 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -1,5 +1,5 @@
 #include <linux/module.h>
-#include <linux/rbtree.h>
+#include <linux/rbtree_augmented.h>
 #include <linux/random.h>
 #include <asm/timex.h>
 
-- 
cgit v1.2.3


From e79bee24fd6134f90af4228cfebd010136d67631 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Mon, 8 Oct 2012 16:32:18 -0700
Subject: atomic: implement generic atomic_dec_if_positive()

The x86 implementation of atomic_dec_if_positive is quite generic, so make
it available to all architectures.

This is needed for "swap: add a simple detector for inappropriate swapin
readahead".

[akpm@linux-foundation.org: do the "#define foo foo" trick in the conventional manner]
Signed-off-by: Shaohua Li <shli@fusionio.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Rik van Riel <riel@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michal Simek <monstr@monstr.eu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/microblaze/include/asm/atomic.h |  1 +
 arch/powerpc/include/asm/atomic.h    |  1 +
 arch/x86/include/asm/atomic.h        | 24 ------------------------
 include/linux/atomic.h               | 25 +++++++++++++++++++++++++
 4 files changed, 27 insertions(+), 24 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/microblaze/include/asm/atomic.h b/arch/microblaze/include/asm/atomic.h
index 472d8bf726df..42ac382a09da 100644
--- a/arch/microblaze/include/asm/atomic.h
+++ b/arch/microblaze/include/asm/atomic.h
@@ -22,5 +22,6 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 
 	return res;
 }
+#define atomic_dec_if_positive atomic_dec_if_positive
 
 #endif /* _ASM_MICROBLAZE_ATOMIC_H */
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index da29032ae38f..e3b1d41c89be 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -268,6 +268,7 @@ static __inline__ int atomic_dec_if_positive(atomic_t *v)
 
 	return t;
 }
+#define atomic_dec_if_positive atomic_dec_if_positive
 
 #define smp_mb__before_atomic_dec()     smp_mb()
 #define smp_mb__after_atomic_dec()      smp_mb()
diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h
index 250b8774c158..b6c3b821acf6 100644
--- a/arch/x86/include/asm/atomic.h
+++ b/arch/x86/include/asm/atomic.h
@@ -240,30 +240,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
 	return c;
 }
 
-
-/*
- * atomic_dec_if_positive - decrement by 1 if old value positive
- * @v: pointer of type atomic_t
- *
- * The function returns the old value of *v minus 1, even if
- * the atomic variable, v, was not decremented.
- */
-static inline int atomic_dec_if_positive(atomic_t *v)
-{
-	int c, old, dec;
-	c = atomic_read(v);
-	for (;;) {
-		dec = c - 1;
-		if (unlikely(dec < 0))
-			break;
-		old = atomic_cmpxchg((v), c, dec);
-		if (likely(old == c))
-			break;
-		c = old;
-	}
-	return dec;
-}
-
 /**
  * atomic_inc_short - increment of a short integer
  * @v: pointer to type int
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index 70cfcb2d63c4..5b08a8540ecf 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -86,6 +86,31 @@ static inline int atomic_dec_unless_positive(atomic_t *p)
 }
 #endif
 
+/*
+ * atomic_dec_if_positive - decrement by 1 if old value positive
+ * @v: pointer of type atomic_t
+ *
+ * The function returns the old value of *v minus 1, even if
+ * the atomic variable, v, was not decremented.
+ */
+#ifndef atomic_dec_if_positive
+static inline int atomic_dec_if_positive(atomic_t *v)
+{
+	int c, old, dec;
+	c = atomic_read(v);
+	for (;;) {
+		dec = c - 1;
+		if (unlikely(dec < 0))
+			break;
+		old = atomic_cmpxchg((v), c, dec);
+		if (likely(old == c))
+			break;
+		c = old;
+	}
+	return dec;
+}
+#endif
+
 #ifndef CONFIG_ARCH_HAS_ATOMIC_OR
 static inline void atomic_or(int i, atomic_t *v)
 {
-- 
cgit v1.2.3


From 45cac65b0fcd287ebb877b141d40ba9bbe8e5da7 Mon Sep 17 00:00:00 2001
From: Shaohua Li <shli@kernel.org>
Date: Mon, 8 Oct 2012 16:32:19 -0700
Subject: readahead: fault retry breaks mmap file read random detection

.fault now can retry.  The retry can break state machine of .fault.  In
filemap_fault, if page is miss, ra->mmap_miss is increased.  In the second
try, since the page is in page cache now, ra->mmap_miss is decreased.  And
these are done in one fault, so we can't detect random mmap file access.

Add a new flag to indicate .fault is tried once.  In the second try, skip
ra->mmap_miss decreasing.  The filemap_fault state machine is ok with it.

I only tested x86, didn't test other archs, but looks the change for other
archs is obvious, but who knows :)

Signed-off-by: Shaohua Li <shaohua.li@fusionio.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/fault.c        | 1 +
 arch/avr32/mm/fault.c      | 1 +
 arch/cris/mm/fault.c       | 1 +
 arch/hexagon/mm/vm_fault.c | 1 +
 arch/ia64/mm/fault.c       | 1 +
 arch/m68k/mm/fault.c       | 1 +
 arch/microblaze/mm/fault.c | 1 +
 arch/mips/mm/fault.c       | 1 +
 arch/openrisc/mm/fault.c   | 1 +
 arch/powerpc/mm/fault.c    | 1 +
 arch/s390/mm/fault.c       | 1 +
 arch/sh/mm/fault.c         | 1 +
 arch/sparc/mm/fault_32.c   | 1 +
 arch/sparc/mm/fault_64.c   | 1 +
 arch/tile/mm/fault.c       | 1 +
 arch/um/kernel/trap.c      | 1 +
 arch/x86/mm/fault.c        | 1 +
 arch/xtensa/mm/fault.c     | 1 +
 include/linux/mm.h         | 1 +
 mm/filemap.c               | 4 ++--
 20 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index c3bd83450227..5dbf13f954f6 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -336,6 +336,7 @@ retry:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			* of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/avr32/mm/fault.c b/arch/avr32/mm/fault.c
index b92e60958617..b2f2d2d66849 100644
--- a/arch/avr32/mm/fault.c
+++ b/arch/avr32/mm/fault.c
@@ -152,6 +152,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would have
diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c
index 45fd542cf173..73312ab6c696 100644
--- a/arch/cris/mm/fault.c
+++ b/arch/cris/mm/fault.c
@@ -186,6 +186,7 @@ retry:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c
index 06695cc4fe58..513b74cb397e 100644
--- a/arch/hexagon/mm/vm_fault.c
+++ b/arch/hexagon/mm/vm_fault.c
@@ -113,6 +113,7 @@ good_area:
 				current->min_flt++;
 			if (fault & VM_FAULT_RETRY) {
 				flags &= ~FAULT_FLAG_ALLOW_RETRY;
+				flags |= FAULT_FLAG_TRIED;
 				goto retry;
 			}
 		}
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 8443daf4f515..6cf0341f978e 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -184,6 +184,7 @@ retry:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c
index aeebbb7b30f0..a563727806bf 100644
--- a/arch/m68k/mm/fault.c
+++ b/arch/m68k/mm/fault.c
@@ -170,6 +170,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c
index eb365d6795fa..714b35a9c4f7 100644
--- a/arch/microblaze/mm/fault.c
+++ b/arch/microblaze/mm/fault.c
@@ -233,6 +233,7 @@ good_area:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c
index c14f6dfed995..9f513486af10 100644
--- a/arch/mips/mm/fault.c
+++ b/arch/mips/mm/fault.c
@@ -171,6 +171,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index 40f850e9766c..e2bfafce66c5 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -183,6 +183,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 5495ebe983a2..0a6b28336eb0 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -451,6 +451,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index ac9122ca1152..04ad4001a289 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -367,6 +367,7 @@ retry:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			down_read(&mm->mmap_sem);
 			goto retry;
 		}
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 3bdc1ad9a341..cbbdcad8fcb3 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -504,6 +504,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/*
 			 * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c
index 77ac917be152..e98bfda205a2 100644
--- a/arch/sparc/mm/fault_32.c
+++ b/arch/sparc/mm/fault_32.c
@@ -265,6 +265,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 1fe0429b6314..413d29263304 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -452,6 +452,7 @@ good_area:
 		}
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			/* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c
index 84ce7abbf5af..fe811fa5f1b9 100644
--- a/arch/tile/mm/fault.c
+++ b/arch/tile/mm/fault.c
@@ -454,6 +454,7 @@ good_area:
 			tsk->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /*
 			  * No need to up_read(&mm->mmap_sem) as we would
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35a..0f00e9c82080 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -89,6 +89,7 @@ good_area:
 				current->min_flt++;
 			if (fault & VM_FAULT_RETRY) {
 				flags &= ~FAULT_FLAG_ALLOW_RETRY;
+				flags |= FAULT_FLAG_TRIED;
 
 				goto retry;
 			}
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a530b230e7d7..8e13ecb41bee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -1220,6 +1220,7 @@ good_area:
 			/* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk
 			 * of starvation. */
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 			goto retry;
 		}
 	}
diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c
index 5a74c53bc69c..2c2f710ed1dc 100644
--- a/arch/xtensa/mm/fault.c
+++ b/arch/xtensa/mm/fault.c
@@ -126,6 +126,7 @@ good_area:
 			current->min_flt++;
 		if (fault & VM_FAULT_RETRY) {
 			flags &= ~FAULT_FLAG_ALLOW_RETRY;
+			flags |= FAULT_FLAG_TRIED;
 
 			 /* No need to up_read(&mm->mmap_sem) as we would
 			 * have already released it in __lock_page_or_retry
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b01e585ab4b5..bcaab4e6fe91 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -161,6 +161,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_ALLOW_RETRY	0x08	/* Retry fault if blocking */
 #define FAULT_FLAG_RETRY_NOWAIT	0x10	/* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE	0x20	/* The fault task is in SIGKILL killable region */
+#define FAULT_FLAG_TRIED	0x40	/* second try */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
diff --git a/mm/filemap.c b/mm/filemap.c
index a9827b42556e..83efee76a5c0 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1607,13 +1607,13 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * Do we have something in the page cache already?
 	 */
 	page = find_get_page(mapping, offset);
-	if (likely(page)) {
+	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
 		/*
 		 * We found the page, so try async readahead before
 		 * waiting for the lock.
 		 */
 		do_async_mmap_readahead(vma, ra, file, page, offset);
-	} else {
+	} else if (!page) {
 		/* No page in the page cache at all */
 		do_sync_mmap_readahead(vma, ra, file, offset);
 		count_vm_event(PGMAJFAULT);
-- 
cgit v1.2.3


From 027ef6c87853b0a9df53175063028edb4950d476 Mon Sep 17 00:00:00 2001
From: Andrea Arcangeli <aarcange@redhat.com>
Date: Mon, 8 Oct 2012 16:33:27 -0700
Subject: mm: thp: fix pmd_present for split_huge_page and PROT_NONE with THP

In many places !pmd_present has been converted to pmd_none.  For pmds
that's equivalent and pmd_none is quicker so using pmd_none is better.

However (unless we delete pmd_present) we should provide an accurate
pmd_present too.  This will avoid the risk of code thinking the pmd is non
present because it's under __split_huge_page_map, see the pmd_mknotpresent
there and the comment above it.

If the page has been mprotected as PROT_NONE, it would also lead to a
pmd_present false negative in the same way as the race with
split_huge_page.

Because the PSE bit stays on at all times (both during split_huge_page and
when the _PAGE_PROTNONE bit get set), we could only check for the PSE bit,
but checking the PROTNONE bit too is still good to remember pmd_present
must always keep PROT_NONE into account.

This explains a not reproducible BUG_ON that was seldom reported on the
lists.

The same issue is in pmd_large, it would go wrong with both PROT_NONE and
if it races with split_huge_page.

Signed-off-by: Andrea Arcangeli <aarcange@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/pgtable.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fc9948465293..a1f780d45f76 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -146,8 +146,7 @@ static inline unsigned long pmd_pfn(pmd_t pmd)
 
 static inline int pmd_large(pmd_t pte)
 {
-	return (pmd_flags(pte) & (_PAGE_PSE | _PAGE_PRESENT)) ==
-		(_PAGE_PSE | _PAGE_PRESENT);
+	return pmd_flags(pte) & _PAGE_PSE;
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -415,7 +414,13 @@ static inline int pte_hidden(pte_t pte)
 
 static inline int pmd_present(pmd_t pmd)
 {
-	return pmd_flags(pmd) & _PAGE_PRESENT;
+	/*
+	 * Checking for _PAGE_PSE is needed too because
+	 * split_huge_page will temporarily clear the present bit (but
+	 * the _PAGE_PSE flag will remain set at all times while the
+	 * _PAGE_PRESENT bit is clear).
+	 */
+	return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
 }
 
 static inline int pmd_none(pmd_t pmd)
-- 
cgit v1.2.3


From b113da65785d5f3f9ff1451ec0fe43d6d76da25b Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 8 Oct 2012 16:34:25 -0700
Subject: mm: Add and use update_mmu_cache_pmd() in transparent huge page code.

The transparent huge page code passes a PMD pointer in as the third
argument of update_mmu_cache(), which expects a PTE pointer.

This never got noticed because X86 implements update_mmu_cache() as a
macro and thus we don't get any type checking, and X86 is the only
architecture which supports transparent huge pages currently.

Before other architectures can support transparent huge pages properly we
need to add a new interface which will take a PMD pointer as the third
argument rather than a PTE pointer.

[akpm@linux-foundation.org: implement update_mm_cache_pmd() for s390]
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Gerald Schaefer <gerald.schaefer@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/s390/include/asm/pgtable.h   | 1 +
 arch/x86/include/asm/pgtable_32.h | 1 +
 arch/x86/include/asm/pgtable_64.h | 1 +
 mm/huge_memory.c                  | 6 +++---
 4 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index ed14fc2db6e0..979fe3dc0788 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -42,6 +42,7 @@ extern void fault_init(void);
  * tables contain all the necessary information.
  */
 #define update_mmu_cache(vma, address, ptep)     do { } while (0)
+#define update_mmu_cache_pmd(vma, address, ptep) do { } while (0)
 
 /*
  * ZERO_PAGE is a global shared page that is always zero; used
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 0c92113c4cb6..8faa215a503e 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -71,6 +71,7 @@ do {						\
  * tables contain all the necessary information.
  */
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index 8251be02301e..47356f9df82e 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -143,6 +143,7 @@ static inline int pgd_large(pgd_t pgd) { return 0; }
 #define pte_unmap(pte) ((void)(pte))/* NOP */
 
 #define update_mmu_cache(vma, address, ptep) do { } while (0)
+#define update_mmu_cache_pmd(vma, address, pmd) do { } while (0)
 
 /* Encode and de-code a swap entry */
 #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 3a8d6b7d95db..68a3c93036f6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -900,7 +900,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		entry = pmd_mkyoung(orig_pmd);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
 		if (pmdp_set_access_flags(vma, haddr, pmd, entry,  1))
-			update_mmu_cache(vma, address, pmd);
+			update_mmu_cache_pmd(vma, address, pmd);
 		ret |= VM_FAULT_WRITE;
 		goto out_unlock;
 	}
@@ -956,7 +956,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		pmdp_clear_flush(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
 		set_pmd_at(mm, haddr, pmd, entry);
-		update_mmu_cache(vma, address, pmd);
+		update_mmu_cache_pmd(vma, address, pmd);
 		page_remove_rmap(page);
 		put_page(page);
 		ret |= VM_FAULT_WRITE;
@@ -2041,7 +2041,7 @@ static void collapse_huge_page(struct mm_struct *mm,
 	BUG_ON(!pmd_none(*pmd));
 	page_add_new_anon_rmap(new_page, vma, address);
 	set_pmd_at(mm, address, pmd, _pmd);
-	update_mmu_cache(vma, address, pmd);
+	update_mmu_cache_pmd(vma, address, pmd);
 	pgtable_trans_huge_deposit(mm, pgtable);
 	spin_unlock(&mm->page_table_lock);
 
-- 
cgit v1.2.3


From 4301785c7c7ca712797f2f25bcd531b1d226ccb3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:25:00 +0100
Subject: um/x86: merge 32 and 64bit variants of checksum.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/asm/checksum.h    | 144 ++++++++++++++++++++++++++++++++++++++++++
 arch/x86/um/asm/checksum_32.h | 140 ----------------------------------------
 arch/x86/um/asm/checksum_64.h | 125 ------------------------------------
 3 files changed, 144 insertions(+), 265 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index b6efe2381b5d..4b181b74454f 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -1,6 +1,150 @@
 #ifndef __UM_CHECKSUM_H
 #define __UM_CHECKSUM_H
 
+#include <linux/string.h>
+#include <linux/in6.h>
+
+/*
+ * computes the checksum of a memory block at buff, length len,
+ * and adds in "sum" (32-bit)
+ *
+ * returns a 32-bit number suitable for feeding into itself
+ * or csum_tcpudp_magic
+ *
+ * this function must be called with even lengths, except
+ * for the last fragment, which may be odd
+ *
+ * it's best to have buff aligned on a 32-bit boundary
+ */
+extern __wsum csum_partial(const void *buff, int len, __wsum sum);
+
+/*
+ *	Note: when you get a NULL pointer exception here this means someone
+ *	passed in an incorrect kernel address to one of these functions.
+ *
+ *	If you use these functions directly please don't forget the
+ *	access_ok().
+ */
+
+static __inline__
+__wsum csum_partial_copy_nocheck(const void *src, void *dst,
+				       int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+
+/*
+ * the same as csum_partial, but copies from src while it
+ * checksums, and handles user-space pointer exceptions correctly, when needed.
+ *
+ * here even more important to align src and dst on a 32-bit (or even
+ * better 64-bit) boundary
+ */
+
+static __inline__
+__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
+					 int len, __wsum sum, int *err_ptr)
+{
+	if (copy_from_user(dst, src, len)) {
+		*err_ptr = -EFAULT;
+		return (__force __wsum)-1;
+	}
+
+	return csum_partial(dst, len, sum);
+}
+
+/**
+ * csum_fold - Fold and invert a 32bit checksum.
+ * sum: 32bit unfolded sum
+ *
+ * Fold a 32bit running checksum to 16bit and invert it. This is usually
+ * the last step before putting a checksum into a packet.
+ * Make sure not to mix with 64bit checksums.
+ */
+static inline __sum16 csum_fold(__wsum sum)
+{
+	__asm__(
+		"  addl %1,%0\n"
+		"  adcl $0xffff,%0"
+		: "=r" (sum)
+		: "r" ((__force u32)sum << 16),
+		  "0" ((__force u32)sum & 0xffff0000)
+	);
+	return (__force __sum16)(~(__force u32)sum >> 16);
+}
+
+/**
+ * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
+ * @saddr: source address
+ * @daddr: destination address
+ * @len: length of packet
+ * @proto: ip protocol of packet
+ * @sum: initial sum to be added in (32bit unfolded)
+ *
+ * Returns the pseudo header checksum the input data. Result is
+ * 32bit unfolded.
+ */
+static inline __wsum
+csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
+		   unsigned short proto, __wsum sum)
+{
+	asm("  addl %1, %0\n"
+	    "  adcl %2, %0\n"
+	    "  adcl %3, %0\n"
+	    "  adcl $0, %0\n"
+		: "=r" (sum)
+	    : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
+	return sum;
+}
+
+/*
+ * computes the checksum of the TCP/UDP pseudo-header
+ * returns a 16-bit checksum, already complemented
+ */
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+					   unsigned short len,
+					   unsigned short proto,
+					   __wsum sum)
+{
+	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
+}
+
+/**
+ * ip_fast_csum - Compute the IPv4 header checksum efficiently.
+ * iph: ipv4 header
+ * ihl: length of header / 4
+ */
+static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
+{
+	unsigned int sum;
+
+	asm(	"  movl (%1), %0\n"
+		"  subl $4, %2\n"
+		"  jbe 2f\n"
+		"  addl 4(%1), %0\n"
+		"  adcl 8(%1), %0\n"
+		"  adcl 12(%1), %0\n"
+		"1: adcl 16(%1), %0\n"
+		"  lea 4(%1), %1\n"
+		"  decl %2\n"
+		"  jne	1b\n"
+		"  adcl $0, %0\n"
+		"  movl %0, %2\n"
+		"  shrl $16, %0\n"
+		"  addw %w2, %w0\n"
+		"  adcl $0, %0\n"
+		"  notl %0\n"
+		"2:"
+	/* Since the input registers which are loaded with iph and ipl
+	   are modified, we must also specify them as outputs, or gcc
+	   will assume they contain their original values. */
+	: "=r" (sum), "=r" (iph), "=r" (ihl)
+	: "1" (iph), "2" (ihl)
+	: "memory");
+	return (__force __sum16)sum;
+}
+
 #ifdef CONFIG_X86_32
 # include "checksum_32.h"
 #else
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
index caab74252e27..ab77b6f9a4bf 100644
--- a/arch/x86/um/asm/checksum_32.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -5,145 +5,6 @@
 #ifndef __UM_SYSDEP_CHECKSUM_H
 #define __UM_SYSDEP_CHECKSUM_H
 
-#include "linux/in6.h"
-#include "linux/string.h"
-
-/*
- * computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit)
- *
- * returns a 32-bit number suitable for feeding into itself
- * or csum_tcpudp_magic
- *
- * this function must be called with even lengths, except
- * for the last fragment, which may be odd
- *
- * it's best to have buff aligned on a 32-bit boundary
- */
-__wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *	Note: when you get a NULL pointer exception here this means someone
- *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the
- *	access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-
-/*
- * the same as csum_partial, but copies from src while it
- * checksums, and handles user-space pointer exceptions correctly, when needed.
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src, void *dst,
-					 int len, __wsum sum, int *err_ptr)
-{
-	if (copy_from_user(dst, src, len)) {
-		*err_ptr = -EFAULT;
-		return (__force __wsum)-1;
-	}
-
-	return csum_partial(dst, len, sum);
-}
-
-/*
- *	This is a version of ip_compute_csum() optimized for IP headers,
- *	which always checksum on 4 octet boundaries.
- *
- *	By Jorge Cwik <jorge@laser.satlink.net>, adapted for linux by
- *	Arnt Gulbrandsen.
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	unsigned int sum;
-
-	__asm__ __volatile__(
-	    "movl (%1), %0	;\n"
-	    "subl $4, %2	;\n"
-	    "jbe 2f		;\n"
-	    "addl 4(%1), %0	;\n"
-	    "adcl 8(%1), %0	;\n"
-	    "adcl 12(%1), %0	;\n"
-"1:	    adcl 16(%1), %0	;\n"
-	    "lea 4(%1), %1	;\n"
-	    "decl %2		;\n"
-	    "jne 1b		;\n"
-	    "adcl $0, %0	;\n"
-	    "movl %0, %2	;\n"
-	    "shrl $16, %0	;\n"
-	    "addw %w2, %w0	;\n"
-	    "adcl $0, %0	;\n"
-	    "notl %0		;\n"
-"2:				;\n"
-	/* Since the input registers which are loaded with iph and ipl
-	   are modified, we must also specify them as outputs, or gcc
-	   will assume they contain their original values. */
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
-	: "1" (iph), "2" (ihl)
-	: "memory");
-	return (__force __sum16)sum;
-}
-
-/*
- *	Fold a partial checksum
- */
-
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__(
-		"addl %1, %0		;\n"
-		"adcl $0xffff, %0	;\n"
-		: "=r" (sum)
-		: "r" ((__force u32)sum << 16),
-		  "0" ((__force u32)sum & 0xffff0000)
-	);
-	return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-    __asm__(
-	"addl %1, %0	;\n"
-	"adcl %2, %0	;\n"
-	"adcl %3, %0	;\n"
-	"adcl $0, %0	;\n"
-	: "=r" (sum)
-	: "g" (daddr), "g"(saddr), "g"((len + proto) << 8), "0"(sum));
-    return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-						   unsigned short len,
-						   unsigned short proto,
-						   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/*
- * this routine is used for miscellaneous IP-like checksums, mainly
- * in icmp.c
- */
-
 static inline __sum16 ip_compute_csum(const void *buff, int len)
 {
     return csum_fold (csum_partial(buff, len, 0));
@@ -198,4 +59,3 @@ static __inline__ __wsum csum_and_copy_to_user(const void *src,
 }
 
 #endif
-
diff --git a/arch/x86/um/asm/checksum_64.h b/arch/x86/um/asm/checksum_64.h
index a5be9031ea85..7b6cd1921573 100644
--- a/arch/x86/um/asm/checksum_64.h
+++ b/arch/x86/um/asm/checksum_64.h
@@ -5,131 +5,6 @@
 #ifndef __UM_SYSDEP_CHECKSUM_H
 #define __UM_SYSDEP_CHECKSUM_H
 
-#include "linux/string.h"
-#include "linux/in6.h"
-#include "asm/uaccess.h"
-
-extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-
-/*
- *	Note: when you get a NULL pointer exception here this means someone
- *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the
- *	access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return(csum_partial(dst, len, sum));
-}
-
-static __inline__
-__wsum csum_partial_copy_from_user(const void __user *src,
-                                         void *dst, int len, __wsum sum,
-                                         int *err_ptr)
-{
-        if (copy_from_user(dst, src, len)) {
-                *err_ptr = -EFAULT;
-                return (__force __wsum)-1;
-        }
-        return csum_partial(dst, len, sum);
-}
-
-/**
- * csum_fold - Fold and invert a 32bit checksum.
- * sum: 32bit unfolded sum
- *
- * Fold a 32bit running checksum to 16bit and invert it. This is usually
- * the last step before putting a checksum into a packet.
- * Make sure not to mix with 64bit checksums.
- */
-static inline __sum16 csum_fold(__wsum sum)
-{
-	__asm__(
-		"  addl %1,%0\n"
-		"  adcl $0xffff,%0"
-		: "=r" (sum)
-		: "r" ((__force u32)sum << 16),
-		  "0" ((__force u32)sum & 0xffff0000)
-	);
-	return (__force __sum16)(~(__force u32)sum >> 16);
-}
-
-/**
- * csum_tcpup_nofold - Compute an IPv4 pseudo header checksum.
- * @saddr: source address
- * @daddr: destination address
- * @len: length of packet
- * @proto: ip protocol of packet
- * @sum: initial sum to be added in (32bit unfolded)
- *
- * Returns the pseudo header checksum the input data. Result is
- * 32bit unfolded.
- */
-static inline __wsum
-csum_tcpudp_nofold(__be32 saddr, __be32 daddr, unsigned short len,
-		   unsigned short proto, __wsum sum)
-{
-	asm("  addl %1, %0\n"
-	    "  adcl %2, %0\n"
-	    "  adcl %3, %0\n"
-	    "  adcl $0, %0\n"
-		: "=r" (sum)
-	    : "g" (daddr), "g" (saddr), "g" ((len + proto) << 8), "0" (sum));
-	return sum;
-}
-
-/*
- * computes the checksum of the TCP/UDP pseudo-header
- * returns a 16-bit checksum, already complemented
- */
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
-					   unsigned short len,
-					   unsigned short proto,
-					   __wsum sum)
-{
-	return csum_fold(csum_tcpudp_nofold(saddr,daddr,len,proto,sum));
-}
-
-/**
- * ip_fast_csum - Compute the IPv4 header checksum efficiently.
- * iph: ipv4 header
- * ihl: length of header / 4
- */
-static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
-{
-	unsigned int sum;
-
-	asm(	"  movl (%1), %0\n"
-		"  subl $4, %2\n"
-		"  jbe 2f\n"
-		"  addl 4(%1), %0\n"
-		"  adcl 8(%1), %0\n"
-		"  adcl 12(%1), %0\n"
-		"1: adcl 16(%1), %0\n"
-		"  lea 4(%1), %1\n"
-		"  decl %2\n"
-		"  jne	1b\n"
-		"  adcl $0, %0\n"
-		"  movl %0, %2\n"
-		"  shrl $16, %0\n"
-		"  addw %w2, %w0\n"
-		"  adcl $0, %0\n"
-		"  notl %0\n"
-		"2:"
-	/* Since the input registers which are loaded with iph and ipl
-	   are modified, we must also specify them as outputs, or gcc
-	   will assume they contain their original values. */
-	: "=r" (sum), "=r" (iph), "=r" (ihl)
-	: "1" (iph), "2" (ihl)
-	: "memory");
-	return (__force __sum16)sum;
-}
-
 static inline unsigned add32_with_carry(unsigned a, unsigned b)
 {
         asm("addl %2,%0\n\t"
-- 
cgit v1.2.3


From 8813f67439524ab54978b011c51665e1854a64b5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:26:17 +0100
Subject: um/x86: merge 32 and 64 bit variants of ptrace.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/x86/um/asm/ptrace.h    | 58 +++++++++++++++++++++++++++++++++++++++++----
 arch/x86/um/asm/ptrace_32.h | 28 ----------------------
 arch/x86/um/asm/ptrace_64.h | 46 -----------------------------------
 3 files changed, 54 insertions(+), 78 deletions(-)
 delete mode 100644 arch/x86/um/asm/ptrace_32.h
 delete mode 100644 arch/x86/um/asm/ptrace_64.h

(limited to 'arch/x86')

diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e72cd0df5ba3..755133258c45 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -1,11 +1,13 @@
 #ifndef __UM_X86_PTRACE_H
 #define __UM_X86_PTRACE_H
 
-#ifdef CONFIG_X86_32
-# include "ptrace_32.h"
-#else
-# include "ptrace_64.h"
+#include <linux/compiler.h>
+#ifndef CONFIG_X86_32
+#define __FRAME_OFFSETS /* Needed to get the R* macros */
 #endif
+#include <asm/ptrace-generic.h>
+
+#define user_mode(r) UPT_IS_USER(&(r)->regs)
 
 #define PT_REGS_AX(r) UPT_AX(&(r)->regs)
 #define PT_REGS_BX(r) UPT_BX(&(r)->regs)
@@ -36,4 +38,52 @@ static inline long regs_return_value(struct pt_regs *regs)
 {
 	return PT_REGS_AX(regs);
 }
+
+/*
+ * Forward declaration to avoid including sysdep/tls.h, which causes a
+ * circular include, and compilation failures.
+ */
+struct user_desc;
+
+#ifdef CONFIG_X86_32
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
+
+extern int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+extern int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                  struct user_desc __user *user_desc);
+
+#else
+
+#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
+
+#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
+#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
+#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
+#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
+#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
+#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
+#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
+#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
+
+#include <asm/errno.h>
+
+static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
+                                         struct user_desc __user *user_desc)
+{
+        return -ENOSYS;
+}
+
+extern long arch_prctl(struct task_struct *task, int code,
+		       unsigned long __user *addr);
+
+#endif
 #endif /* __UM_X86_PTRACE_H */
diff --git a/arch/x86/um/asm/ptrace_32.h b/arch/x86/um/asm/ptrace_32.h
deleted file mode 100644
index 2cf225351b65..000000000000
--- a/arch/x86/um/asm/ptrace_32.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* 
- * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_I386_H
-#define __UM_PTRACE_I386_H
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
-#include "linux/compiler.h"
-#include "asm/ptrace-generic.h"
-
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-/*
- * Forward declaration to avoid including sysdep/tls.h, which causes a
- * circular include, and compilation failures.
- */
-struct user_desc;
-
-extern int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-extern int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                  struct user_desc __user *user_desc);
-
-#endif
diff --git a/arch/x86/um/asm/ptrace_64.h b/arch/x86/um/asm/ptrace_64.h
deleted file mode 100644
index ea7bff394320..000000000000
--- a/arch/x86/um/asm/ptrace_64.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright 2003 PathScale, Inc.
- *
- * Licensed under the GPL
- */
-
-#ifndef __UM_PTRACE_X86_64_H
-#define __UM_PTRACE_X86_64_H
-
-#include "linux/compiler.h"
-#include "asm/errno.h"
-
-#define __FRAME_OFFSETS /* Needed to get the R* macros */
-#include "asm/ptrace-generic.h"
-
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
-#define PT_REGS_R8(r) UPT_R8(&(r)->regs)
-#define PT_REGS_R9(r) UPT_R9(&(r)->regs)
-#define PT_REGS_R10(r) UPT_R10(&(r)->regs)
-#define PT_REGS_R11(r) UPT_R11(&(r)->regs)
-#define PT_REGS_R12(r) UPT_R12(&(r)->regs)
-#define PT_REGS_R13(r) UPT_R13(&(r)->regs)
-#define PT_REGS_R14(r) UPT_R14(&(r)->regs)
-#define PT_REGS_R15(r) UPT_R15(&(r)->regs)
-
-/* XXX */
-#define user_mode(r) UPT_IS_USER(&(r)->regs)
-
-struct user_desc;
-
-static inline int ptrace_get_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
-                                         struct user_desc __user *user_desc)
-{
-        return -ENOSYS;
-}
-
-extern long arch_prctl(struct task_struct *task, int code,
-		       unsigned long __user *addr);
-#endif
-- 
cgit v1.2.3


From 382d95fdfa7ff5c54f6495c597c7cf6d124e404b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:26:54 +0100
Subject: um: move sysrq.h out of include/shared

never used by userland-side objects

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/include/asm/sysrq.h    |  7 +++++++
 arch/um/include/shared/sysrq.h |  7 -------
 arch/um/kernel/sysrq.c         |  2 +-
 arch/x86/um/sysrq_32.c         | 12 ++++++------
 arch/x86/um/sysrq_64.c         |  2 +-
 5 files changed, 15 insertions(+), 15 deletions(-)
 create mode 100644 arch/um/include/asm/sysrq.h
 delete mode 100644 arch/um/include/shared/sysrq.h

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/sysrq.h b/arch/um/include/asm/sysrq.h
new file mode 100644
index 000000000000..c8d332b56b98
--- /dev/null
+++ b/arch/um/include/asm/sysrq.h
@@ -0,0 +1,7 @@
+#ifndef __UM_SYSRQ_H
+#define __UM_SYSRQ_H
+
+struct task_struct;
+extern void show_trace(struct task_struct* task, unsigned long *stack);
+
+#endif
diff --git a/arch/um/include/shared/sysrq.h b/arch/um/include/shared/sysrq.h
deleted file mode 100644
index c8d332b56b98..000000000000
--- a/arch/um/include/shared/sysrq.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __UM_SYSRQ_H
-#define __UM_SYSRQ_H
-
-struct task_struct;
-extern void show_trace(struct task_struct* task, unsigned long *stack);
-
-#endif
diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
index 0960de54495a..e562ff80409a 100644
--- a/arch/um/kernel/sysrq.c
+++ b/arch/um/kernel/sysrq.c
@@ -7,7 +7,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sched.h>
-#include "sysrq.h"
+#include <asm/sysrq.h>
 
 /* Catch non-i386 SUBARCH's. */
 #if !defined(CONFIG_UML_X86) || defined(CONFIG_64BIT)
diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c
index 2d5cc51e9bef..c9bee5b8c0d3 100644
--- a/arch/x86/um/sysrq_32.c
+++ b/arch/x86/um/sysrq_32.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/smp.h"
-#include "linux/sched.h"
-#include "linux/kallsyms.h"
-#include "asm/ptrace.h"
-#include "sysrq.h"
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/kallsyms.h>
+#include <asm/ptrace.h>
+#include <asm/sysrq.h>
 
 /* This is declared by <linux/sched.h> */
 void show_regs(struct pt_regs *regs)
diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c
index 08258f179969..a0e7fb1134a0 100644
--- a/arch/x86/um/sysrq_64.c
+++ b/arch/x86/um/sysrq_64.c
@@ -10,7 +10,7 @@
 #include <linux/utsname.h>
 #include <asm/current.h>
 #include <asm/ptrace.h>
-#include "sysrq.h"
+#include <asm/sysrq.h>
 
 void __show_regs(struct pt_regs *regs)
 {
-- 
cgit v1.2.3


From 37185b33240870719b6b5913a46e6a441f1ae96f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Mon, 8 Oct 2012 03:27:32 +0100
Subject: um: get rid of pointless include "..." where include <...> will do

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 arch/um/drivers/chan_kern.c              |  4 ++--
 arch/um/drivers/chan_user.c              |  4 ++--
 arch/um/drivers/chan_user.h              |  2 +-
 arch/um/drivers/cow_sys.h                |  6 +++---
 arch/um/drivers/daemon.h                 |  2 +-
 arch/um/drivers/daemon_kern.c            |  4 ++--
 arch/um/drivers/daemon_user.c            |  6 +++---
 arch/um/drivers/fd.c                     |  4 ++--
 arch/um/drivers/harddog_user.c           |  2 +-
 arch/um/drivers/hostaudio_kern.c         | 18 ++++++++--------
 arch/um/drivers/line.c                   | 16 +++++++-------
 arch/um/drivers/line.h                   | 12 +++++------
 arch/um/drivers/mconsole.h               |  2 +-
 arch/um/drivers/mconsole_kern.c          | 10 ++++-----
 arch/um/drivers/mconsole_kern.h          |  2 +-
 arch/um/drivers/mmapper_kern.c           |  2 +-
 arch/um/drivers/net_kern.c               | 10 ++++-----
 arch/um/drivers/net_user.c               |  6 +++---
 arch/um/drivers/null.c                   |  2 +-
 arch/um/drivers/pcap_kern.c              |  4 ++--
 arch/um/drivers/pcap_user.c              |  4 ++--
 arch/um/drivers/pcap_user.h              |  2 +-
 arch/um/drivers/port_kern.c              | 20 +++++++++---------
 arch/um/drivers/port_user.c              |  4 ++--
 arch/um/drivers/pty.c                    |  4 ++--
 arch/um/drivers/random.c                 |  4 ++--
 arch/um/drivers/slip_common.c            |  2 +-
 arch/um/drivers/slip_kern.c              |  2 +-
 arch/um/drivers/slip_user.c              |  6 +++---
 arch/um/drivers/slirp_kern.c             |  6 +++---
 arch/um/drivers/slirp_user.c             |  4 ++--
 arch/um/drivers/ssl.c                    | 22 +++++++++----------
 arch/um/drivers/stdio_console.c          | 36 ++++++++++++++++----------------
 arch/um/drivers/tty.c                    |  4 ++--
 arch/um/drivers/ubd_kern.c               |  8 +++----
 arch/um/drivers/ubd_user.c               |  2 +-
 arch/um/drivers/umcast.h                 |  2 +-
 arch/um/drivers/umcast_kern.c            |  4 ++--
 arch/um/drivers/umcast_user.c            |  4 ++--
 arch/um/drivers/vde_kern.c               |  6 +++---
 arch/um/drivers/vde_user.c               |  4 ++--
 arch/um/drivers/xterm.c                  |  4 ++--
 arch/um/drivers/xterm_kern.c             |  4 ++--
 arch/um/include/asm/dma.h                |  2 +-
 arch/um/include/asm/mmu.h                |  2 +-
 arch/um/include/asm/page.h               |  2 +-
 arch/um/include/asm/pgtable.h            |  4 ++--
 arch/um/include/asm/processor-generic.h  |  6 +++---
 arch/um/include/asm/ptrace-generic.h     |  2 +-
 arch/um/include/asm/smp.h                |  6 +++---
 arch/um/include/shared/arch.h            |  2 +-
 arch/um/include/shared/as-layout.h       |  2 +-
 arch/um/include/shared/irq_kern.h        |  4 ++--
 arch/um/include/shared/irq_user.h        |  2 +-
 arch/um/include/shared/kern_util.h       |  4 ++--
 arch/um/include/shared/longjmp.h         |  4 ++--
 arch/um/include/shared/os.h              |  6 +++---
 arch/um/include/shared/registers.h       |  4 ++--
 arch/um/include/shared/skas/skas.h       |  2 +-
 arch/um/include/shared/skas_ptrace.h     |  2 +-
 arch/um/kernel/asm-offsets.c             |  2 +-
 arch/um/kernel/config.c.in               |  2 +-
 arch/um/kernel/dyn.lds.S                 |  2 +-
 arch/um/kernel/early_printk.c            |  2 +-
 arch/um/kernel/exec.c                    |  8 +++----
 arch/um/kernel/gmon_syms.c               |  2 +-
 arch/um/kernel/gprof_syms.c              |  2 +-
 arch/um/kernel/initrd.c                  | 12 +++++------
 arch/um/kernel/irq.c                     | 22 +++++++++----------
 arch/um/kernel/ksyms.c                   |  2 +-
 arch/um/kernel/mem.c                     | 12 +++++------
 arch/um/kernel/process.c                 |  8 +++----
 arch/um/kernel/reboot.c                  | 14 ++++++-------
 arch/um/kernel/sigio.c                   |  6 +++---
 arch/um/kernel/signal.c                  |  4 ++--
 arch/um/kernel/skas/clone.c              |  8 +++----
 arch/um/kernel/skas/mmu.c                | 16 +++++++-------
 arch/um/kernel/skas/process.c            | 12 +++++------
 arch/um/kernel/skas/syscall.c            | 10 ++++-----
 arch/um/kernel/skas/uaccess.c            |  4 ++--
 arch/um/kernel/smp.c                     | 30 +++++++++++++-------------
 arch/um/kernel/syscall.c                 | 20 +++++++++---------
 arch/um/kernel/time.c                    |  4 ++--
 arch/um/kernel/tlb.c                     |  8 +++----
 arch/um/kernel/trap.c                    | 10 ++++-----
 arch/um/kernel/um_arch.c                 | 14 ++++++-------
 arch/um/kernel/umid.c                    |  6 +++---
 arch/um/kernel/uml.lds.S                 |  2 +-
 arch/um/os-Linux/aio.c                   |  8 +++----
 arch/um/os-Linux/drivers/etap.h          |  2 +-
 arch/um/os-Linux/drivers/ethertap_kern.c |  2 +-
 arch/um/os-Linux/drivers/ethertap_user.c |  6 +++---
 arch/um/os-Linux/drivers/tuntap.h        |  2 +-
 arch/um/os-Linux/drivers/tuntap_kern.c   |  2 +-
 arch/um/os-Linux/drivers/tuntap_user.c   |  4 ++--
 arch/um/os-Linux/elf_aux.c               |  6 +++---
 arch/um/os-Linux/execvp.c                |  4 ++--
 arch/um/os-Linux/file.c                  |  2 +-
 arch/um/os-Linux/helper.c                |  6 +++---
 arch/um/os-Linux/irq.c                   |  6 +++---
 arch/um/os-Linux/main.c                  | 10 ++++-----
 arch/um/os-Linux/mem.c                   |  4 ++--
 arch/um/os-Linux/process.c               |  8 +++----
 arch/um/os-Linux/registers.c             |  6 +++---
 arch/um/os-Linux/sigio.c                 | 10 ++++-----
 arch/um/os-Linux/signal.c                |  8 +++----
 arch/um/os-Linux/skas/mem.c              | 20 +++++++++---------
 arch/um/os-Linux/skas/process.c          | 22 +++++++++----------
 arch/um/os-Linux/start_up.c              | 14 ++++++-------
 arch/um/os-Linux/time.c                  |  4 ++--
 arch/um/os-Linux/tty.c                   |  4 ++--
 arch/um/os-Linux/umid.c                  |  4 ++--
 arch/um/os-Linux/user_syms.c             |  4 ++--
 arch/um/os-Linux/util.c                  |  2 +-
 arch/um/sys-ppc/miscthings.c             |  6 +++---
 arch/um/sys-ppc/ptrace.c                 |  2 +-
 arch/um/sys-ppc/ptrace_user.c            |  2 +-
 arch/um/sys-ppc/shared/sysdep/ptrace.h   |  2 +-
 arch/um/sys-ppc/sigcontext.c             |  2 +-
 arch/um/sys-ppc/sysrq.c                  |  4 ++--
 arch/x86/um/asm/elf.h                    |  2 +-
 arch/x86/um/bugs_32.c                    |  6 +++---
 arch/x86/um/bugs_64.c                    |  2 +-
 arch/x86/um/fault.c                      |  2 +-
 arch/x86/um/ldt.c                        | 10 ++++-----
 arch/x86/um/mem_64.c                     |  6 +++---
 arch/x86/um/os-Linux/registers.c         |  4 ++--
 arch/x86/um/os-Linux/task_size.c         |  2 +-
 arch/x86/um/os-Linux/tls.c               |  2 +-
 arch/x86/um/ptrace_32.c                  |  8 +++----
 arch/x86/um/ptrace_user.c                |  2 +-
 arch/x86/um/shared/sysdep/ptrace.h       |  2 +-
 arch/x86/um/shared/sysdep/stub.h         |  4 ++--
 arch/x86/um/shared/sysdep/syscalls_32.h  |  4 ++--
 arch/x86/um/signal.c                     |  4 ++--
 arch/x86/um/stub_32.S                    |  2 +-
 arch/x86/um/stub_64.S                    |  2 +-
 arch/x86/um/stub_segv.c                  |  6 +++---
 arch/x86/um/tls_32.c                     | 12 +++++------
 arch/x86/um/tls_64.c                     |  2 +-
 fs/hostfs/hostfs.h                       |  2 +-
 fs/hostfs/hostfs_kern.c                  |  4 ++--
 fs/hostfs/hostfs_user.c                  |  1 -
 fs/hppfs/hppfs.c                         |  2 +-
 144 files changed, 428 insertions(+), 429 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
index 87eebfe03c61..c3bba73e4be6 100644
--- a/arch/um/drivers/chan_kern.c
+++ b/arch/um/drivers/chan_kern.c
@@ -7,8 +7,8 @@
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
 #include "chan.h"
-#include "os.h"
-#include "irq_kern.h"
+#include <os.h>
+#include <irq_kern.h>
 
 #ifdef CONFIG_NOCONFIG_CHAN
 static void *not_configged_init(char *str, int device,
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index f180813ce2c7..9be670ad23b5 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -11,8 +11,8 @@
 #include <termios.h>
 #include <sys/ioctl.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 void generic_close(int fd, void *unused)
 {
diff --git a/arch/um/drivers/chan_user.h b/arch/um/drivers/chan_user.h
index 6257b7a6e1af..dc693298eb8f 100644
--- a/arch/um/drivers/chan_user.h
+++ b/arch/um/drivers/chan_user.h
@@ -6,7 +6,7 @@
 #ifndef __CHAN_USER_H__
 #define __CHAN_USER_H__
 
-#include "init.h"
+#include <init.h>
 
 struct chan_opts {
 	void (*const announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
index 7f2ed0b8824a..67cbee63e702 100644
--- a/arch/um/drivers/cow_sys.h
+++ b/arch/um/drivers/cow_sys.h
@@ -1,9 +1,9 @@
 #ifndef __COW_SYS_H__
 #define __COW_SYS_H__
 
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 static inline void *cow_malloc(int size)
 {
diff --git a/arch/um/drivers/daemon.h b/arch/um/drivers/daemon.h
index 6e0e891f8a00..c2dd1951559f 100644
--- a/arch/um/drivers/daemon.h
+++ b/arch/um/drivers/daemon.h
@@ -6,7 +6,7 @@
 #ifndef __DAEMON_H__
 #define __DAEMON_H__
 
-#include "net_user.h"
+#include <net_user.h>
 
 #define SWITCH_VERSION 3
 
diff --git a/arch/um/drivers/daemon_kern.c b/arch/um/drivers/daemon_kern.c
index b4a1522f2157..7568cc2f3cd6 100644
--- a/arch/um/drivers/daemon_kern.c
+++ b/arch/um/drivers/daemon_kern.c
@@ -6,9 +6,9 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "daemon.h"
 
 struct daemon_init {
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index a4fd7bc14af7..8813c10d0177 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -14,9 +14,9 @@
 #include <sys/time.h>
 #include <sys/un.h>
 #include "daemon.h"
-#include "net_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 enum request_type { REQ_NEW_CONTROL };
 
diff --git a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
index 5b81d2574415..a13a427b996b 100644
--- a/arch/um/drivers/fd.c
+++ b/arch/um/drivers/fd.c
@@ -9,8 +9,8 @@
 #include <errno.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct fd_chan {
 	int fd;
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 0345d6206d40..f99b32a4dbff 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -6,7 +6,7 @@
 #include <stdio.h>
 #include <unistd.h>
 #include <errno.h>
-#include "os.h"
+#include <os.h>
 
 struct dog_data {
 	int stdin;
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index f9f6a4e20590..9b90fdc4b151 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -3,15 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/fs.h"
-#include "linux/module.h"
-#include "linux/slab.h"
-#include "linux/sound.h"
-#include "linux/soundcard.h"
-#include "linux/mutex.h"
-#include "asm/uaccess.h"
-#include "init.h"
-#include "os.h"
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/sound.h>
+#include <linux/soundcard.h>
+#include <linux/mutex.h>
+#include <asm/uaccess.h>
+#include <init.h>
+#include <os.h>
 
 struct hostaudio_state {
 	int fd;
diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
index bbaf2c59830a..b8e841c128aa 100644
--- a/arch/um/drivers/line.c
+++ b/arch/um/drivers/line.c
@@ -3,15 +3,15 @@
  * Licensed under the GPL
  */
 
-#include "linux/irqreturn.h"
-#include "linux/kd.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
+#include <linux/irqreturn.h>
+#include <linux/kd.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
 #include "chan.h"
-#include "irq_kern.h"
-#include "irq_user.h"
-#include "kern_util.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <kern_util.h>
+#include <os.h>
 
 #define LINE_BUFSIZE 4096
 
diff --git a/arch/um/drivers/line.h b/arch/um/drivers/line.h
index bae95611e7ab..138a14526d9c 100644
--- a/arch/um/drivers/line.h
+++ b/arch/um/drivers/line.h
@@ -6,12 +6,12 @@
 #ifndef __LINE_H__
 #define __LINE_H__
 
-#include "linux/list.h"
-#include "linux/workqueue.h"
-#include "linux/tty.h"
-#include "linux/interrupt.h"
-#include "linux/spinlock.h"
-#include "linux/mutex.h"
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/tty.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
 #include "chan_user.h"
 #include "mconsole_kern.h"
 
diff --git a/arch/um/drivers/mconsole.h b/arch/um/drivers/mconsole.h
index c139ae1d6826..8b22535c62ce 100644
--- a/arch/um/drivers/mconsole.h
+++ b/arch/um/drivers/mconsole.h
@@ -12,7 +12,7 @@
 #define u32 uint32_t
 #endif
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 #define MCONSOLE_MAGIC (0xcafebabe)
 #define MCONSOLE_MAX_DATA (512)
diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
index 664a60e8dfb4..25e63fdd8028 100644
--- a/arch/um/drivers/mconsole_kern.c
+++ b/arch/um/drivers/mconsole_kern.c
@@ -24,13 +24,13 @@
 #include <asm/uaccess.h>
 #include <asm/switch_to.h>
 
-#include "init.h"
-#include "irq_kern.h"
-#include "irq_user.h"
-#include "kern_util.h"
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
+#include <kern_util.h>
 #include "mconsole.h"
 #include "mconsole_kern.h"
-#include "os.h"
+#include <os.h>
 
 static int do_unlink_socket(struct notifier_block *notifier,
 			    unsigned long what, void *data)
diff --git a/arch/um/drivers/mconsole_kern.h b/arch/um/drivers/mconsole_kern.h
index d2fe07e78958..7a0c6a1ad1d4 100644
--- a/arch/um/drivers/mconsole_kern.h
+++ b/arch/um/drivers/mconsole_kern.h
@@ -6,7 +6,7 @@
 #ifndef __MCONSOLE_KERN_H__
 #define __MCONSOLE_KERN_H__
 
-#include "linux/list.h"
+#include <linux/list.h>
 #include "mconsole.h"
 
 struct mconsole_entry {
diff --git a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
index c0ef803c7c70..62145c276167 100644
--- a/arch/um/drivers/mmapper_kern.c
+++ b/arch/um/drivers/mmapper_kern.c
@@ -18,7 +18,7 @@
 #include <linux/mm.h>
 
 #include <asm/uaccess.h>
-#include "mem_user.h"
+#include <mem_user.h>
 
 /* These are set in mmapper_init, which is called at boot time */
 static unsigned long mmapper_size;
diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 458d324f062d..b1314ebf1f72 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -18,12 +18,12 @@
 #include <linux/skbuff.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
-#include "init.h"
-#include "irq_kern.h"
-#include "irq_user.h"
+#include <init.h>
+#include <irq_kern.h>
+#include <irq_user.h>
 #include "mconsole_kern.h"
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 
 #define DRIVER_NAME "uml-netdev"
 
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 05090c37fa84..cd14157b556d 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -11,9 +11,9 @@
 #include <string.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 int tap_open_common(void *dev, char *gate_addr)
 {
diff --git a/arch/um/drivers/null.c b/arch/um/drivers/null.c
index 2b45a1446c86..10495747ce8e 100644
--- a/arch/um/drivers/null.c
+++ b/arch/um/drivers/null.c
@@ -7,7 +7,7 @@
 #include <errno.h>
 #include <fcntl.h>
 #include "chan_user.h"
-#include "os.h"
+#include <os.h>
 
 /* This address is used only as a unique identifier */
 static int null_chan;
diff --git a/arch/um/drivers/pcap_kern.c b/arch/um/drivers/pcap_kern.c
index 2860525f8ff6..be0fb57bd1d7 100644
--- a/arch/um/drivers/pcap_kern.c
+++ b/arch/um/drivers/pcap_kern.c
@@ -3,9 +3,9 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "pcap_user.h"
 
 struct pcap_init {
diff --git a/arch/um/drivers/pcap_user.c b/arch/um/drivers/pcap_user.c
index 702a75b190ee..c07b9c752c86 100644
--- a/arch/um/drivers/pcap_user.c
+++ b/arch/um/drivers/pcap_user.c
@@ -7,9 +7,9 @@
 #include <pcap.h>
 #include <string.h>
 #include <asm/types.h>
-#include "net_user.h"
+#include <net_user.h>
 #include "pcap_user.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 #define PCAP_FD(p) (*(int *)(p))
 
diff --git a/arch/um/drivers/pcap_user.h b/arch/um/drivers/pcap_user.h
index d8ba6153f912..1ca7c764cc63 100644
--- a/arch/um/drivers/pcap_user.h
+++ b/arch/um/drivers/pcap_user.h
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct pcap_data {
 	char *host_if;
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 1d83d50236e1..40ca5cc275e9 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -3,16 +3,16 @@
  * Licensed under the GPL
  */
 
-#include "linux/completion.h"
-#include "linux/interrupt.h"
-#include "linux/list.h"
-#include "linux/mutex.h"
-#include "linux/slab.h"
-#include "linux/workqueue.h"
-#include "asm/atomic.h"
-#include "init.h"
-#include "irq_kern.h"
-#include "os.h"
+#include <linux/completion.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/workqueue.h>
+#include <asm/atomic.h>
+#include <init.h>
+#include <irq_kern.h>
+#include <os.h>
 #include "port.h"
 
 struct port_list {
diff --git a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
index 7b010b76ddf0..9a8e1b64c22e 100644
--- a/arch/um/drivers/port_user.c
+++ b/arch/um/drivers/port_user.c
@@ -10,9 +10,9 @@
 #include <unistd.h>
 #include <netinet/in.h>
 #include "chan_user.h"
-#include "os.h"
+#include <os.h>
 #include "port.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 struct port_chan {
 	int raw;
diff --git a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
index cff2b75d31fd..f1fcc2cedb5e 100644
--- a/arch/um/drivers/pty.c
+++ b/arch/um/drivers/pty.c
@@ -12,8 +12,8 @@
 #include <termios.h>
 #include <sys/stat.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct pty_chan {
 	void (*announce)(char *dev_name, int dev);
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index e32c6aa6396f..9e3a72205827 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -13,8 +13,8 @@
 #include <linux/miscdevice.h>
 #include <linux/delay.h>
 #include <asm/uaccess.h>
-#include "irq_kern.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <os.h>
 
 /*
  * core module and version information
diff --git a/arch/um/drivers/slip_common.c b/arch/um/drivers/slip_common.c
index e89cfc68fc3e..f597fa7c91d3 100644
--- a/arch/um/drivers/slip_common.c
+++ b/arch/um/drivers/slip_common.c
@@ -1,6 +1,6 @@
 #include <string.h>
 #include "slip_common.h"
-#include "net_user.h"
+#include <net_user.h>
 
 int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
 {
diff --git a/arch/um/drivers/slip_kern.c b/arch/um/drivers/slip_kern.c
index dd2aadc14af0..ed5249fc0574 100644
--- a/arch/um/drivers/slip_kern.c
+++ b/arch/um/drivers/slip_kern.c
@@ -6,7 +6,7 @@
 #include <linux/if_arp.h>
 #include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "slip.h"
 
 struct slip_init {
diff --git a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
index 932b4d69bec2..55c290d925f3 100644
--- a/arch/um/drivers/slip_user.c
+++ b/arch/um/drivers/slip_user.c
@@ -11,10 +11,10 @@
 #include <string.h>
 #include <sys/termios.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
+#include <net_user.h>
+#include <os.h>
 #include "slip.h"
-#include "um_malloc.h"
+#include <um_malloc.h>
 
 static int slip_user_init(void *data, void *dev)
 {
diff --git a/arch/um/drivers/slirp_kern.c b/arch/um/drivers/slirp_kern.c
index e376284f0fb7..4ef11ca7cacf 100644
--- a/arch/um/drivers/slirp_kern.c
+++ b/arch/um/drivers/slirp_kern.c
@@ -4,11 +4,11 @@
  */
 
 #include <linux/if_arp.h>
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 #include "slirp.h"
 
 struct slirp_init {
diff --git a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
index db4adb639ff8..c999d187abb9 100644
--- a/arch/um/drivers/slirp_user.c
+++ b/arch/um/drivers/slirp_user.c
@@ -7,8 +7,8 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/wait.h>
-#include "net_user.h"
-#include "os.h"
+#include <net_user.h>
+#include <os.h>
 #include "slirp.h"
 
 static int slirp_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
index 7e86f0070123..16fdd0a0f9d6 100644
--- a/arch/um/drivers/ssl.c
+++ b/arch/um/drivers/ssl.c
@@ -3,19 +3,19 @@
  * Licensed under the GPL
  */
 
-#include "linux/fs.h"
-#include "linux/tty.h"
-#include "linux/tty_driver.h"
-#include "linux/major.h"
-#include "linux/mm.h"
-#include "linux/init.h"
-#include "linux/console.h"
-#include "asm/termbits.h"
-#include "asm/irq.h"
+#include <linux/fs.h>
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/major.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/console.h>
+#include <asm/termbits.h>
+#include <asm/irq.h>
 #include "ssl.h"
 #include "chan.h"
-#include "init.h"
-#include "irq_user.h"
+#include <init.h>
+#include <irq_user.h>
 #include "mconsole_kern.h"
 
 static const int ssl_version = 1;
diff --git a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
index 929b99a261f3..827777af3f6d 100644
--- a/arch/um/drivers/stdio_console.c
+++ b/arch/um/drivers/stdio_console.c
@@ -3,27 +3,27 @@
  * Licensed under the GPL
  */
 
-#include "linux/posix_types.h"
-#include "linux/tty.h"
-#include "linux/tty_flip.h"
-#include "linux/types.h"
-#include "linux/major.h"
-#include "linux/kdev_t.h"
-#include "linux/console.h"
-#include "linux/string.h"
-#include "linux/sched.h"
-#include "linux/list.h"
-#include "linux/init.h"
-#include "linux/interrupt.h"
-#include "linux/slab.h"
-#include "linux/hardirq.h"
-#include "asm/current.h"
-#include "asm/irq.h"
+#include <linux/posix_types.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/list.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/hardirq.h>
+#include <asm/current.h>
+#include <asm/irq.h>
 #include "stdio_console.h"
 #include "chan.h"
-#include "irq_user.h"
+#include <irq_user.h>
 #include "mconsole_kern.h"
-#include "init.h"
+#include <init.h>
 
 #define MAX_TTYS (16)
 
diff --git a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
index a97391f9ec54..eaa201bca5ed 100644
--- a/arch/um/drivers/tty.c
+++ b/arch/um/drivers/tty.c
@@ -7,8 +7,8 @@
 #include <fcntl.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 
 struct tty_chan {
 	char *dev;
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 0643e5bc9f41..41bf72073ccc 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -33,12 +33,12 @@
 #include <linux/platform_device.h>
 #include <linux/scatterlist.h>
 #include <asm/tlbflush.h>
-#include "kern_util.h"
+#include <kern_util.h>
 #include "mconsole_kern.h"
-#include "init.h"
-#include "irq_kern.h"
+#include <init.h>
+#include <irq_kern.h>
 #include "ubd.h"
-#include "os.h"
+#include <os.h>
 #include "cow.h"
 
 enum ubd_req { UBD_READ, UBD_WRITE };
diff --git a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
index ffe02c431dea..a703e45d8aac 100644
--- a/arch/um/drivers/ubd_user.c
+++ b/arch/um/drivers/ubd_user.c
@@ -19,7 +19,7 @@
 #include <byteswap.h>
 
 #include "ubd.h"
-#include "os.h"
+#include <os.h>
 
 void ignore_sigwinch_sig(void)
 {
diff --git a/arch/um/drivers/umcast.h b/arch/um/drivers/umcast.h
index 6f8c0fe890fb..c190c6440911 100644
--- a/arch/um/drivers/umcast.h
+++ b/arch/um/drivers/umcast.h
@@ -6,7 +6,7 @@
 #ifndef __DRIVERS_UMCAST_H
 #define __DRIVERS_UMCAST_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct umcast_data {
 	char *addr;
diff --git a/arch/um/drivers/umcast_kern.c b/arch/um/drivers/umcast_kern.c
index 42dab11d2ecf..f5ba6e377913 100644
--- a/arch/um/drivers/umcast_kern.c
+++ b/arch/um/drivers/umcast_kern.c
@@ -11,10 +11,10 @@
  * Licensed under the GPL.
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
 #include "umcast.h"
-#include "net_kern.h"
+#include <net_kern.h>
 
 struct umcast_init {
 	char *addr;
diff --git a/arch/um/drivers/umcast_user.c b/arch/um/drivers/umcast_user.c
index 010fa2d849ec..6074184bb51b 100644
--- a/arch/um/drivers/umcast_user.c
+++ b/arch/um/drivers/umcast_user.c
@@ -16,8 +16,8 @@
 #include <errno.h>
 #include <netinet/in.h>
 #include "umcast.h"
-#include "net_user.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <um_malloc.h>
 
 static struct sockaddr_in *new_addr(char *addr, unsigned short port)
 {
diff --git a/arch/um/drivers/vde_kern.c b/arch/um/drivers/vde_kern.c
index 1b852bffdebc..6a365fadc7c4 100644
--- a/arch/um/drivers/vde_kern.c
+++ b/arch/um/drivers/vde_kern.c
@@ -7,10 +7,10 @@
  *
  */
 
-#include "linux/init.h"
+#include <linux/init.h>
 #include <linux/netdevice.h>
-#include "net_kern.h"
-#include "net_user.h"
+#include <net_kern.h>
+#include <net_user.h>
 #include "vde.h"
 
 static void vde_init(struct net_device *dev, void *data)
diff --git a/arch/um/drivers/vde_user.c b/arch/um/drivers/vde_user.c
index b8c286748d3d..64cb630d1157 100644
--- a/arch/um/drivers/vde_user.c
+++ b/arch/um/drivers/vde_user.c
@@ -6,8 +6,8 @@
 #include <stddef.h>
 #include <errno.h>
 #include <libvdeplug.h>
-#include "net_user.h"
-#include "um_malloc.h"
+#include <net_user.h>
+#include <um_malloc.h>
 #include "vde.h"
 
 static int vde_user_init(void *data, void *dev)
diff --git a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
index 969110e56487..20e30be44795 100644
--- a/arch/um/drivers/xterm.c
+++ b/arch/um/drivers/xterm.c
@@ -11,8 +11,8 @@
 #include <string.h>
 #include <termios.h>
 #include "chan_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <um_malloc.h>
 #include "xterm.h"
 
 struct xterm_chan {
diff --git a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
index e3031e69445d..e8f9957bfbf6 100644
--- a/arch/um/drivers/xterm_kern.c
+++ b/arch/um/drivers/xterm_kern.c
@@ -7,8 +7,8 @@
 #include <linux/completion.h>
 #include <linux/irqreturn.h>
 #include <asm/irq.h>
-#include "irq_kern.h"
-#include "os.h"
+#include <irq_kern.h>
+#include <os.h>
 
 struct xterm_wait {
 	struct completion ready;
diff --git a/arch/um/include/asm/dma.h b/arch/um/include/asm/dma.h
index 9f6139a8a525..f88c5860520b 100644
--- a/arch/um/include/asm/dma.h
+++ b/arch/um/include/asm/dma.h
@@ -1,7 +1,7 @@
 #ifndef __UM_DMA_H
 #define __UM_DMA_H
 
-#include "asm/io.h"
+#include <asm/io.h>
 
 extern unsigned long uml_physmem;
 
diff --git a/arch/um/include/asm/mmu.h b/arch/um/include/asm/mmu.h
index 53e8b498ebba..da705448590f 100644
--- a/arch/um/include/asm/mmu.h
+++ b/arch/um/include/asm/mmu.h
@@ -6,7 +6,7 @@
 #ifndef __ARCH_UM_MMU_H
 #define __ARCH_UM_MMU_H
 
-#include "mm_id.h"
+#include <mm_id.h>
 #include <asm/mm_context.h>
 
 typedef struct mm_context {
diff --git a/arch/um/include/asm/page.h b/arch/um/include/asm/page.h
index 7cfc3cedce84..5ff53d9185f7 100644
--- a/arch/um/include/asm/page.h
+++ b/arch/um/include/asm/page.h
@@ -99,7 +99,7 @@ extern unsigned long uml_physmem;
 
 #define __va_space (8*1024*1024)
 
-#include "mem.h"
+#include <mem.h>
 
 /* Cast to unsigned long before casting to void * to avoid a warning from
  * mmap_kmem about cutting a long long down to a void *.  Not sure that
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 5888f1b83477..ae02909a1875 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -23,9 +23,9 @@
 				   pte_present gives true */
 
 #ifdef CONFIG_3_LEVEL_PGTABLES
-#include "asm/pgtable-3level.h"
+#include <asm/pgtable-3level.h>
 #else
-#include "asm/pgtable-2level.h"
+#include <asm/pgtable-2level.h>
 #endif
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 33a6a2423bd2..24e97be814bc 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -10,9 +10,9 @@ struct pt_regs;
 
 struct task_struct;
 
-#include "asm/ptrace.h"
-#include "registers.h"
-#include "sysdep/archsetjmp.h"
+#include <asm/ptrace.h>
+#include <registers.h>
+#include <sysdep/archsetjmp.h>
 
 #include <linux/prefetch.h>
 
diff --git a/arch/um/include/asm/ptrace-generic.h b/arch/um/include/asm/ptrace-generic.h
index 442f1d025dc2..cb9b3c47ca8e 100644
--- a/arch/um/include/asm/ptrace-generic.h
+++ b/arch/um/include/asm/ptrace-generic.h
@@ -9,7 +9,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/ptrace-abi.h>
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct pt_regs {
 	struct uml_pt_regs regs;
diff --git a/arch/um/include/asm/smp.h b/arch/um/include/asm/smp.h
index 4a4b09d4f366..e4507938d8cf 100644
--- a/arch/um/include/asm/smp.h
+++ b/arch/um/include/asm/smp.h
@@ -3,9 +3,9 @@
 
 #ifdef CONFIG_SMP
 
-#include "linux/bitops.h"
-#include "asm/current.h"
-#include "linux/cpumask.h"
+#include <linux/bitops.h>
+#include <asm/current.h>
+#include <linux/cpumask.h>
 
 #define raw_smp_processor_id() (current_thread->cpu)
 
diff --git a/arch/um/include/shared/arch.h b/arch/um/include/shared/arch.h
index 2de92a08a76b..4f46abda060d 100644
--- a/arch/um/include/shared/arch.h
+++ b/arch/um/include/shared/arch.h
@@ -6,7 +6,7 @@
 #ifndef __ARCH_H__
 #define __ARCH_H__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 extern void arch_check_bugs(void);
 extern int arch_fixup(unsigned long address, struct uml_pt_regs *regs);
diff --git a/arch/um/include/shared/as-layout.h b/arch/um/include/shared/as-layout.h
index 86daa5461815..694c792bab4e 100644
--- a/arch/um/include/shared/as-layout.h
+++ b/arch/um/include/shared/as-layout.h
@@ -35,7 +35,7 @@
 
 #ifndef __ASSEMBLY__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct cpu_task {
 	int pid;
diff --git a/arch/um/include/shared/irq_kern.h b/arch/um/include/shared/irq_kern.h
index 7a5bfa6291b8..e05bd667de15 100644
--- a/arch/um/include/shared/irq_kern.h
+++ b/arch/um/include/shared/irq_kern.h
@@ -6,8 +6,8 @@
 #ifndef __IRQ_KERN_H__
 #define __IRQ_KERN_H__
 
-#include "linux/interrupt.h"
-#include "asm/ptrace.h"
+#include <linux/interrupt.h>
+#include <asm/ptrace.h>
 
 extern int um_request_irq(unsigned int irq, int fd, int type,
 			  irq_handler_t handler,
diff --git a/arch/um/include/shared/irq_user.h b/arch/um/include/shared/irq_user.h
index 2b6d703925b5..df5633053957 100644
--- a/arch/um/include/shared/irq_user.h
+++ b/arch/um/include/shared/irq_user.h
@@ -6,7 +6,7 @@
 #ifndef __IRQ_USER_H__
 #define __IRQ_USER_H__
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 struct irq_fd {
 	struct irq_fd *next;
diff --git a/arch/um/include/shared/kern_util.h b/arch/um/include/shared/kern_util.h
index af6b6dc868ba..83a91f976330 100644
--- a/arch/um/include/shared/kern_util.h
+++ b/arch/um/include/shared/kern_util.h
@@ -6,8 +6,8 @@
 #ifndef __KERN_UTIL_H__
 #define __KERN_UTIL_H__
 
-#include "sysdep/ptrace.h"
-#include "sysdep/faultinfo.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/faultinfo.h>
 
 struct siginfo;
 
diff --git a/arch/um/include/shared/longjmp.h b/arch/um/include/shared/longjmp.h
index e860bc5848e0..9bdddf4c405b 100644
--- a/arch/um/include/shared/longjmp.h
+++ b/arch/um/include/shared/longjmp.h
@@ -1,8 +1,8 @@
 #ifndef __UML_LONGJMP_H
 #define __UML_LONGJMP_H
 
-#include "sysdep/archsetjmp.h"
-#include "os.h"
+#include <sysdep/archsetjmp.h>
+#include <os.h>
 
 extern int setjmp(jmp_buf);
 extern void longjmp(jmp_buf, int);
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 89b686c1a3ea..44883049c11d 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -7,9 +7,9 @@
 #define __OS_H__
 
 #include <stdarg.h>
-#include "irq_user.h"
-#include "longjmp.h"
-#include "mm_id.h"
+#include <irq_user.h>
+#include <longjmp.h>
+#include <mm_id.h>
 
 #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
 
diff --git a/arch/um/include/shared/registers.h b/arch/um/include/shared/registers.h
index f1e0aa56c52a..f5b76355ad71 100644
--- a/arch/um/include/shared/registers.h
+++ b/arch/um/include/shared/registers.h
@@ -6,8 +6,8 @@
 #ifndef __REGISTERS_H
 #define __REGISTERS_H
 
-#include "sysdep/ptrace.h"
-#include "sysdep/archsetjmp.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/archsetjmp.h>
 
 extern int save_fp_registers(int pid, unsigned long *fp_regs);
 extern int restore_fp_registers(int pid, unsigned long *fp_regs);
diff --git a/arch/um/include/shared/skas/skas.h b/arch/um/include/shared/skas/skas.h
index 64d2c7443306..c45df961c874 100644
--- a/arch/um/include/shared/skas/skas.h
+++ b/arch/um/include/shared/skas/skas.h
@@ -6,7 +6,7 @@
 #ifndef __SKAS_H
 #define __SKAS_H
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 extern int userspace_pid[];
 extern int proc_mm, ptrace_faultinfo, ptrace_ldt;
diff --git a/arch/um/include/shared/skas_ptrace.h b/arch/um/include/shared/skas_ptrace.h
index 3d31bbacd016..630a9c92b93c 100644
--- a/arch/um/include/shared/skas_ptrace.h
+++ b/arch/um/include/shared/skas_ptrace.h
@@ -9,6 +9,6 @@
 #define PTRACE_FAULTINFO 52
 #define PTRACE_SWITCH_MM 55
 
-#include "sysdep/skas_ptrace.h"
+#include <sysdep/skas_ptrace.h>
 
 #endif
diff --git a/arch/um/kernel/asm-offsets.c b/arch/um/kernel/asm-offsets.c
index 91ea538e1612..1fb12235ab9c 100644
--- a/arch/um/kernel/asm-offsets.c
+++ b/arch/um/kernel/asm-offsets.c
@@ -1 +1 @@
-#include "sysdep/kernel-offsets.h"
+#include <sysdep/kernel-offsets.h>
diff --git a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
index b7a43feafde7..972bf1659564 100644
--- a/arch/um/kernel/config.c.in
+++ b/arch/um/kernel/config.c.in
@@ -5,7 +5,7 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include "init.h"
+#include <init.h>
 
 static __initdata const char *config[] = {
 "CONFIG"
diff --git a/arch/um/kernel/dyn.lds.S b/arch/um/kernel/dyn.lds.S
index a3cab6d3ae02..fb8fd6fb6563 100644
--- a/arch/um/kernel/dyn.lds.S
+++ b/arch/um/kernel/dyn.lds.S
@@ -89,7 +89,7 @@ SECTIONS
 
   .kstrtab : { *(.kstrtab) }
 
-  #include "asm/common.lds.S"
+  #include <asm/common.lds.S>
 
   init.data : { INIT_DATA }
 
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
index ec649bf72f68..49480f092456 100644
--- a/arch/um/kernel/early_printk.c
+++ b/arch/um/kernel/early_printk.c
@@ -9,7 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/console.h>
 #include <linux/init.h>
-#include "os.h"
+#include <os.h>
 
 static void early_console_write(struct console *con, const char *s, unsigned int n)
 {
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index 8c82786da823..de66c421ae9d 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -12,10 +12,10 @@
 #include <asm/current.h>
 #include <asm/processor.h>
 #include <asm/uaccess.h>
-#include "as-layout.h"
-#include "mem_user.h"
-#include "skas.h"
-#include "os.h"
+#include <as-layout.h>
+#include <mem_user.h>
+#include <skas.h>
+#include <os.h>
 #include "internal.h"
 
 void flush_thread(void)
diff --git a/arch/um/kernel/gmon_syms.c b/arch/um/kernel/gmon_syms.c
index e9bcf247bcee..1bf61266da8e 100644
--- a/arch/um/kernel/gmon_syms.c
+++ b/arch/um/kernel/gmon_syms.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
+#include <linux/module.h>
 
 extern void __bb_init_func(void *)  __attribute__((weak));
 EXPORT_SYMBOL(__bb_init_func);
diff --git a/arch/um/kernel/gprof_syms.c b/arch/um/kernel/gprof_syms.c
index e2f043d0de6c..74ddb44288a3 100644
--- a/arch/um/kernel/gprof_syms.c
+++ b/arch/um/kernel/gprof_syms.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "linux/module.h"
+#include <linux/module.h>
 
 extern void mcount(void);
 EXPORT_SYMBOL(mcount);
diff --git a/arch/um/kernel/initrd.c b/arch/um/kernel/initrd.c
index 10cc18f729fd..55cead809b18 100644
--- a/arch/um/kernel/initrd.c
+++ b/arch/um/kernel/initrd.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/init.h"
-#include "linux/bootmem.h"
-#include "linux/initrd.h"
-#include "asm/types.h"
-#include "init.h"
-#include "os.h"
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/initrd.h>
+#include <asm/types.h>
+#include <init.h>
+#include <os.h>
 
 /* Changed by uml_initrd_setup, which is a setup */
 static char *initrd __initdata = NULL;
diff --git a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
index 9883026f0730..36e12f0cefd5 100644
--- a/arch/um/kernel/irq.c
+++ b/arch/um/kernel/irq.c
@@ -5,17 +5,17 @@
  *	Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar
  */
 
-#include "linux/cpumask.h"
-#include "linux/hardirq.h"
-#include "linux/interrupt.h"
-#include "linux/kernel_stat.h"
-#include "linux/module.h"
-#include "linux/sched.h"
-#include "linux/seq_file.h"
-#include "linux/slab.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
+#include <linux/cpumask.h>
+#include <linux/hardirq.h>
+#include <linux/interrupt.h>
+#include <linux/kernel_stat.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
 
 /*
  * This list is accessed under irq_lock, except in sigio_handler,
diff --git a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
index e17bea0b22e1..543c04756939 100644
--- a/arch/um/kernel/ksyms.c
+++ b/arch/um/kernel/ksyms.c
@@ -4,7 +4,7 @@
  */
 
 #include <linux/module.h>
-#include "os.h"
+#include <os.h>
 
 EXPORT_SYMBOL(set_signals);
 EXPORT_SYMBOL(get_signals);
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index ebb86b218445..5abcbfbe7e25 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -12,12 +12,12 @@
 #include <linux/slab.h>
 #include <asm/fixmap.h>
 #include <asm/page.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern.h"
-#include "kern_util.h"
-#include "mem_user.h"
-#include "os.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern.h>
+#include <kern_util.h>
+#include <mem_user.h>
+#include <os.h>
 
 /* allocated in paging_init, zeroed in mem_init, and unchanged thereafter */
 unsigned long *empty_zero_page = NULL;
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index c5f5afa50745..41f53240e794 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -23,10 +23,10 @@
 #include <asm/pgtable.h>
 #include <asm/mmu_context.h>
 #include <asm/uaccess.h>
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 /*
  * This is a per-cpu array.  A processor only modifies its entry and it only
diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
index 3d15243ce692..ced8903921ae 100644
--- a/arch/um/kernel/reboot.c
+++ b/arch/um/kernel/reboot.c
@@ -3,13 +3,13 @@
  * Licensed under the GPL
  */
 
-#include "linux/sched.h"
-#include "linux/spinlock.h"
-#include "linux/slab.h"
-#include "linux/oom.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/slab.h>
+#include <linux/oom.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 void (*pm_power_off)(void);
 
diff --git a/arch/um/kernel/sigio.c b/arch/um/kernel/sigio.c
index c88211139a51..b5e0cbb34382 100644
--- a/arch/um/kernel/sigio.c
+++ b/arch/um/kernel/sigio.c
@@ -4,9 +4,9 @@
  */
 
 #include <linux/interrupt.h>
-#include "irq_kern.h"
-#include "os.h"
-#include "sigio.h"
+#include <irq_kern.h>
+#include <os.h>
+#include <sigio.h>
 
 /* Protected by sigio_lock() called from write_sigio_workaround */
 static int sigio_irq_fd = -1;
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index cc9c2350e417..db18eb6124e1 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -9,8 +9,8 @@
 #include <asm/siginfo.h>
 #include <asm/signal.h>
 #include <asm/unistd.h>
-#include "frame_kern.h"
-#include "kern_util.h"
+#include <frame_kern.h>
+#include <kern_util.h>
 
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
diff --git a/arch/um/kernel/skas/clone.c b/arch/um/kernel/skas/clone.c
index e1fd066a3525..289771dadf81 100644
--- a/arch/um/kernel/skas/clone.c
+++ b/arch/um/kernel/skas/clone.c
@@ -7,10 +7,10 @@
 #include <sched.h>
 #include <asm/unistd.h>
 #include <sys/time.h>
-#include "as-layout.h"
-#include "ptrace_user.h"
-#include "stub-data.h"
-#include "sysdep/stub.h"
+#include <as-layout.h>
+#include <ptrace_user.h>
+#include <stub-data.h>
+#include <sysdep/stub.h>
 
 /*
  * This is in a separate file because it needs to be compiled with any
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 0a49ef0c2bf4..ff03067a3b14 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -3,14 +3,14 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "linux/slab.h"
-#include "asm/pgalloc.h"
-#include "asm/pgtable.h"
-#include "as-layout.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <as-layout.h>
+#include <os.h>
+#include <skas.h>
 
 extern int __syscall_stub_start;
 
diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
index 0a9e57e7446b..4da11b3c8ddb 100644
--- a/arch/um/kernel/skas/process.c
+++ b/arch/um/kernel/skas/process.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/init.h"
-#include "linux/sched.h"
-#include "as-layout.h"
-#include "kern.h"
-#include "os.h"
-#include "skas.h"
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <as-layout.h>
+#include <kern.h>
+#include <os.h>
+#include <skas.h>
 
 int new_mm(unsigned long stack)
 {
diff --git a/arch/um/kernel/skas/syscall.c b/arch/um/kernel/skas/syscall.c
index 86368a025a96..c0681e097432 100644
--- a/arch/um/kernel/skas/syscall.c
+++ b/arch/um/kernel/skas/syscall.c
@@ -3,11 +3,11 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/ptrace.h"
-#include "kern_util.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/syscalls.h"
+#include <linux/kernel.h>
+#include <linux/ptrace.h>
+#include <kern_util.h>
+#include <sysdep/ptrace.h>
+#include <sysdep/syscalls.h>
 
 extern int syscall_table_size;
 #define NR_SYSCALLS (syscall_table_size / sizeof(void *))
diff --git a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
index cd7df79c6a56..1d3e0c17340b 100644
--- a/arch/um/kernel/skas/uaccess.c
+++ b/arch/um/kernel/skas/uaccess.c
@@ -11,8 +11,8 @@
 #include <asm/current.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr)
 {
diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index a02b7e9e6b94..5c8c3ea7db7b 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -3,24 +3,24 @@
  * Licensed under the GPL
  */
 
-#include "linux/percpu.h"
-#include "asm/pgalloc.h"
-#include "asm/tlb.h"
+#include <linux/percpu.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
 
 #ifdef CONFIG_SMP
 
-#include "linux/sched.h"
-#include "linux/module.h"
-#include "linux/threads.h"
-#include "linux/interrupt.h"
-#include "linux/err.h"
-#include "linux/hardirq.h"
-#include "asm/smp.h"
-#include "asm/processor.h"
-#include "asm/spinlock.h"
-#include "kern.h"
-#include "irq_user.h"
-#include "os.h"
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/threads.h>
+#include <linux/interrupt.h>
+#include <linux/err.h>
+#include <linux/hardirq.h>
+#include <asm/smp.h>
+#include <asm/processor.h>
+#include <asm/spinlock.h>
+#include <kern.h>
+#include <irq_user.h>
+#include <os.h>
 
 /* Per CPU bogomips and other parameters
  * The only piece used here is the ipi pipe, which is set before SMP is
diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
index a4c6d8eee74c..10808bda3671 100644
--- a/arch/um/kernel/syscall.c
+++ b/arch/um/kernel/syscall.c
@@ -3,16 +3,16 @@
  * Licensed under the GPL
  */
 
-#include "linux/file.h"
-#include "linux/fs.h"
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "linux/utsname.h"
-#include "linux/syscalls.h"
-#include "asm/current.h"
-#include "asm/mman.h"
-#include "asm/uaccess.h"
-#include "asm/unistd.h"
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/utsname.h>
+#include <linux/syscalls.h>
+#include <asm/current.h>
+#include <asm/mman.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
 #include "internal.h"
 
 long sys_fork(void)
diff --git a/arch/um/kernel/time.c b/arch/um/kernel/time.c
index 5f76d4ba151c..117568d4f64a 100644
--- a/arch/um/kernel/time.c
+++ b/arch/um/kernel/time.c
@@ -10,8 +10,8 @@
 #include <linux/threads.h>
 #include <asm/irq.h>
 #include <asm/param.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 void timer_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs)
 {
diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c
index f819af951c19..9472079471bb 100644
--- a/arch/um/kernel/tlb.c
+++ b/arch/um/kernel/tlb.c
@@ -8,10 +8,10 @@
 #include <linux/sched.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include "as-layout.h"
-#include "mem_user.h"
-#include "os.h"
-#include "skas.h"
+#include <as-layout.h>
+#include <mem_user.h>
+#include <os.h>
+#include <skas.h>
 
 struct host_vm_change {
 	struct host_vm_op {
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 0353b98ae35a..cf7585fbc9fa 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -10,11 +10,11 @@
 #include <asm/current.h>
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
-#include "arch.h"
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "skas.h"
+#include <arch.h>
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <skas.h>
 
 /*
  * Note this is constrained to return 0, -EFAULT, -EACCESS, -ENOMEM by
diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 4db8770906ca..87df5e3acc26 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -14,13 +14,13 @@
 #include <asm/pgtable.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
-#include "as-layout.h"
-#include "arch.h"
-#include "init.h"
-#include "kern.h"
-#include "kern_util.h"
-#include "mem_user.h"
-#include "os.h"
+#include <as-layout.h>
+#include <arch.h>
+#include <init.h>
+#include <kern.h>
+#include <kern_util.h>
+#include <mem_user.h>
+#include <os.h>
 
 #define DEFAULT_COMMAND_LINE "root=98:0"
 
diff --git a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
index 81e07e2be3ae..f6cc3bd61781 100644
--- a/arch/um/kernel/umid.c
+++ b/arch/um/kernel/umid.c
@@ -4,9 +4,9 @@
  */
 
 #include <asm/errno.h>
-#include "init.h"
-#include "kern.h"
-#include "os.h"
+#include <init.h>
+#include <kern.h>
+#include <os.h>
 
 /* Changed by set_umid_arg */
 static int umid_inited = 0;
diff --git a/arch/um/kernel/uml.lds.S b/arch/um/kernel/uml.lds.S
index fbd99402d4d2..ff65fb4f1a95 100644
--- a/arch/um/kernel/uml.lds.S
+++ b/arch/um/kernel/uml.lds.S
@@ -60,7 +60,7 @@ SECTIONS
 	PROVIDE_HIDDEN(__rela_iplt_end = .);
   }
 
-  #include "asm/common.lds.S"
+  #include <asm/common.lds.S>
 
   init.data : { INIT_DATA }
   .data    :
diff --git a/arch/um/os-Linux/aio.c b/arch/um/os-Linux/aio.c
index c5d039e1ff3b..3a6bc2af0961 100644
--- a/arch/um/os-Linux/aio.c
+++ b/arch/um/os-Linux/aio.c
@@ -9,10 +9,10 @@
 #include <errno.h>
 #include <sys/time.h>
 #include <asm/unistd.h>
-#include "aio.h"
-#include "init.h"
-#include "kern_util.h"
-#include "os.h"
+#include <aio.h>
+#include <init.h>
+#include <kern_util.h>
+#include <os.h>
 
 struct aio_thread_req {
 	enum aio_type type;
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
index ddffd41c3f3f..54183a679fdd 100644
--- a/arch/um/os-Linux/drivers/etap.h
+++ b/arch/um/os-Linux/drivers/etap.h
@@ -6,7 +6,7 @@
 #ifndef __DRIVERS_ETAP_H
 #define __DRIVERS_ETAP_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct ethertap_data {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
index 7f6f9a71aae4..f424600a583f 100644
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ b/arch/um/os-Linux/drivers/ethertap_kern.c
@@ -9,7 +9,7 @@
 #include <linux/init.h>
 #include <linux/netdevice.h>
 #include "etap.h"
-#include "net_kern.h"
+#include <net_kern.h>
 
 struct ethertap_init {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
index db3d6481375a..b39b6696ac58 100644
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ b/arch/um/os-Linux/drivers/ethertap_user.c
@@ -13,9 +13,9 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include "etap.h"
-#include "os.h"
-#include "net_user.h"
-#include "um_malloc.h"
+#include <os.h>
+#include <net_user.h>
+#include <um_malloc.h>
 
 #define MAX_PACKET ETH_MAX_PACKET
 
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
index f17c31586c84..7367354ac8df 100644
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ b/arch/um/os-Linux/drivers/tuntap.h
@@ -6,7 +6,7 @@
 #ifndef __UM_TUNTAP_H
 #define __UM_TUNTAP_H
 
-#include "net_user.h"
+#include <net_user.h>
 
 struct tuntap_data {
 	char *dev_name;
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
index 4048800e4696..d9d56e5810fe 100644
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ b/arch/um/os-Linux/drivers/tuntap_kern.c
@@ -7,7 +7,7 @@
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <asm/errno.h>
-#include "net_kern.h"
+#include <net_kern.h>
 #include "tuntap.h"
 
 struct tuntap_init {
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
index a2aacffdd907..14126d9176aa 100644
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ b/arch/um/os-Linux/drivers/tuntap_user.c
@@ -13,8 +13,8 @@
 #include <sys/socket.h>
 #include <sys/wait.h>
 #include <sys/uio.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 #include "tuntap.h"
 
 static int tuntap_user_init(void *data, void *dev)
diff --git a/arch/um/os-Linux/elf_aux.c b/arch/um/os-Linux/elf_aux.c
index d895271ad6f7..1a365ddc4d02 100644
--- a/arch/um/os-Linux/elf_aux.c
+++ b/arch/um/os-Linux/elf_aux.c
@@ -9,9 +9,9 @@
  */
 #include <elf.h>
 #include <stddef.h>
-#include "init.h"
-#include "elf_user.h"
-#include "mem_user.h"
+#include <init.h>
+#include <elf_user.h>
+#include <mem_user.h>
 
 typedef Elf32_auxv_t elf_auxv_t;
 
diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c
index 66e583a4031b..8fb25ca07c46 100644
--- a/arch/um/os-Linux/execvp.c
+++ b/arch/um/os-Linux/execvp.c
@@ -27,12 +27,12 @@
 #include <limits.h>
 
 #ifndef TEST
-#include "um_malloc.h"
+#include <um_malloc.h>
 #else
 #include <stdio.h>
 #define um_kmalloc malloc
 #endif
-#include "os.h"
+#include <os.h>
 
 /* Execute FILE, searching in the `PATH' environment variable if it contains
    no slashes, with arguments ARGV and environment from `environ'.  */
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index b049a63bb74b..c17bd6f7d674 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -13,7 +13,7 @@
 #include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/un.h>
-#include "os.h"
+#include <os.h>
 
 static void copy_stat(struct uml_stat *dst, const struct stat64 *src)
 {
diff --git a/arch/um/os-Linux/helper.c b/arch/um/os-Linux/helper.c
index cf26c4a9a43a..e3ee4a51ef63 100644
--- a/arch/um/os-Linux/helper.c
+++ b/arch/um/os-Linux/helper.c
@@ -10,9 +10,9 @@
 #include <linux/limits.h>
 #include <sys/socket.h>
 #include <sys/wait.h>
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 struct helper_data {
 	void (*pre_exec)(void*);
diff --git a/arch/um/os-Linux/irq.c b/arch/um/os-Linux/irq.c
index 9a49908b576c..b9afb74b79ad 100644
--- a/arch/um/os-Linux/irq.c
+++ b/arch/um/os-Linux/irq.c
@@ -8,9 +8,9 @@
 #include <poll.h>
 #include <signal.h>
 #include <string.h>
-#include "irq_user.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <irq_user.h>
+#include <os.h>
+#include <um_malloc.h>
 
 /*
  * Locked by irq_lock in arch/um/kernel/irq.c.  Changed by os_create_pollfd
diff --git a/arch/um/os-Linux/main.c b/arch/um/os-Linux/main.c
index 7a86dd516eb1..749c96da7b99 100644
--- a/arch/um/os-Linux/main.c
+++ b/arch/um/os-Linux/main.c
@@ -10,11 +10,11 @@
 #include <signal.h>
 #include <string.h>
 #include <sys/resource.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern_util.h"
-#include "os.h"
-#include "um_malloc.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern_util.h>
+#include <os.h>
+#include <um_malloc.h>
 
 #define PGD_BOUND (4 * 1024 * 1024)
 #define STACKSIZE (8 * 1024 * 1024)
diff --git a/arch/um/os-Linux/mem.c b/arch/um/os-Linux/mem.c
index 8e421e1d6d36..ba4398056fe9 100644
--- a/arch/um/os-Linux/mem.c
+++ b/arch/um/os-Linux/mem.c
@@ -13,8 +13,8 @@
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <sys/param.h>
-#include "init.h"
-#include "os.h"
+#include <init.h>
+#include <os.h>
 
 /* Modified by which_tmpdir, which is called during early boot */
 static char *default_tmpdir = "/tmp";
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 307f173e7f82..162bea3d91b2 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -12,10 +12,10 @@
 #include <sys/ptrace.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "init.h"
-#include "longjmp.h"
-#include "os.h"
-#include "skas_ptrace.h"
+#include <init.h>
+#include <longjmp.h>
+#include <os.h>
+#include <skas_ptrace.h>
 
 #define ARBITRARY_ADDR -1
 #define FAILURE_PID    -1
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index b866b9e3bef9..2ff8d4fe83c4 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -7,9 +7,9 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/ptrace.h>
-#include "sysdep/ptrace.h"
-#include "sysdep/ptrace_user.h"
-#include "registers.h"
+#include <sysdep/ptrace.h>
+#include <sysdep/ptrace_user.h>
+#include <registers.h>
 
 int save_registers(int pid, struct uml_pt_regs *regs)
 {
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index 3c161218c671..8b61cc0e82c8 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -11,11 +11,11 @@
 #include <sched.h>
 #include <signal.h>
 #include <string.h>
-#include "kern_util.h"
-#include "init.h"
-#include "os.h"
-#include "sigio.h"
-#include "um_malloc.h"
+#include <kern_util.h>
+#include <init.h>
+#include <os.h>
+#include <sigio.h>
+#include <um_malloc.h>
 
 /*
  * Protected by sigio_lock(), also used by sigio_cleanup, which is an
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index 6366ce904b9b..b1469fe93295 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -9,10 +9,10 @@
 #include <errno.h>
 #include <signal.h>
 #include <strings.h>
-#include "as-layout.h"
-#include "kern_util.h"
-#include "os.h"
-#include "sysdep/mcontext.h"
+#include <as-layout.h>
+#include <kern_util.h>
+#include <os.h>
+#include <sysdep/mcontext.h>
 #include "internal.h"
 
 void (*sig_info[NSIG])(int, siginfo_t *, struct uml_pt_regs *) = {
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index 90b310d29179..689b18db798f 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -8,16 +8,16 @@
 #include <errno.h>
 #include <string.h>
 #include <sys/mman.h>
-#include "init.h"
-#include "as-layout.h"
-#include "mm_id.h"
-#include "os.h"
-#include "proc_mm.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "sysdep/ptrace.h"
-#include "sysdep/stub.h"
+#include <init.h>
+#include <as-layout.h>
+#include <mm_id.h>
+#include <os.h>
+#include <proc_mm.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <sysdep/ptrace.h>
+#include <sysdep/stub.h>
 
 extern unsigned long batch_syscall_stub, __syscall_stub_start;
 
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index d93bb40499f7..4625949bf1e4 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -11,17 +11,17 @@
 #include <sys/mman.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "as-layout.h"
-#include "init.h"
-#include "kern_util.h"
-#include "mem.h"
-#include "os.h"
-#include "proc_mm.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/stub.h"
+#include <as-layout.h>
+#include <init.h>
+#include <kern_util.h>
+#include <mem.h>
+#include <os.h>
+#include <proc_mm.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <skas_ptrace.h>
+#include <sysdep/stub.h>
 
 int is_skas_winch(int pid, int fd, void *data)
 {
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 425162e22af5..da4b9e9999fd 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -16,13 +16,13 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <asm/unistd.h>
-#include "init.h"
-#include "os.h"
-#include "mem_user.h"
-#include "ptrace_user.h"
-#include "registers.h"
-#include "skas.h"
-#include "skas_ptrace.h"
+#include <init.h>
+#include <os.h>
+#include <mem_user.h>
+#include <ptrace_user.h>
+#include <registers.h>
+#include <skas.h>
+#include <skas_ptrace.h>
 
 static void ptrace_child(void)
 {
diff --git a/arch/um/os-Linux/time.c b/arch/um/os-Linux/time.c
index 0748fe0c8a73..fac388cb464f 100644
--- a/arch/um/os-Linux/time.c
+++ b/arch/um/os-Linux/time.c
@@ -8,8 +8,8 @@
 #include <signal.h>
 #include <time.h>
 #include <sys/time.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 #include "internal.h"
 
 int set_interval(void)
diff --git a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
index dd12b99dcb59..721d8afa329b 100644
--- a/arch/um/os-Linux/tty.c
+++ b/arch/um/os-Linux/tty.c
@@ -7,8 +7,8 @@
 #include <unistd.h>
 #include <errno.h>
 #include <fcntl.h>
-#include "kern_util.h"
-#include "os.h"
+#include <kern_util.h>
+#include <os.h>
 
 struct grantpt_info {
 	int fd;
diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c
index 4832eb519f8d..c1dc89261f67 100644
--- a/arch/um/os-Linux/umid.c
+++ b/arch/um/os-Linux/umid.c
@@ -12,8 +12,8 @@
 #include <string.h>
 #include <unistd.h>
 #include <sys/stat.h>
-#include "init.h"
-#include "os.h"
+#include <init.h>
+#include <os.h>
 
 #define UML_DIR "~/.uml/"
 
diff --git a/arch/um/os-Linux/user_syms.c b/arch/um/os-Linux/user_syms.c
index 73926fa3f96b..db4a034aeee1 100644
--- a/arch/um/os-Linux/user_syms.c
+++ b/arch/um/os-Linux/user_syms.c
@@ -1,5 +1,5 @@
-#include "linux/types.h"
-#include "linux/module.h"
+#include <linux/types.h>
+#include <linux/module.h>
 
 /* Some of this are builtin function (some are not but could in the future),
  * so I *must* declare good prototypes for them and then EXPORT them.
diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
index 9e3b43bb84c9..492ef5e6e166 100644
--- a/arch/um/os-Linux/util.c
+++ b/arch/um/os-Linux/util.c
@@ -13,7 +13,7 @@
 #include <wait.h>
 #include <sys/mman.h>
 #include <sys/utsname.h>
-#include "os.h"
+#include <os.h>
 
 void stack_protections(unsigned long address)
 {
diff --git a/arch/um/sys-ppc/miscthings.c b/arch/um/sys-ppc/miscthings.c
index 1c11aed9c719..25908d26ce07 100644
--- a/arch/um/sys-ppc/miscthings.c
+++ b/arch/um/sys-ppc/miscthings.c
@@ -1,6 +1,6 @@
-#include "linux/threads.h"
-#include "linux/stddef.h"  // for NULL
-#include "linux/elf.h"  // for AT_NULL
+#include <linux/threads.h>
+#include <linux/stddef.h>  // for NULL
+#include <linux/elf.h>  // for AT_NULL
 
 /* The following function nicked from arch/ppc/kernel/process.c and
  * adapted slightly */
diff --git a/arch/um/sys-ppc/ptrace.c b/arch/um/sys-ppc/ptrace.c
index 66ef155248f1..8245df41b201 100644
--- a/arch/um/sys-ppc/ptrace.c
+++ b/arch/um/sys-ppc/ptrace.c
@@ -1,4 +1,4 @@
-#include "linux/sched.h"
+#include <linux/sched.h>
 #include "asm/ptrace.h"
 
 int putreg(struct task_struct *child, unsigned long regno, 
diff --git a/arch/um/sys-ppc/ptrace_user.c b/arch/um/sys-ppc/ptrace_user.c
index 224d2403c37b..4601b9296aa7 100644
--- a/arch/um/sys-ppc/ptrace_user.c
+++ b/arch/um/sys-ppc/ptrace_user.c
@@ -1,6 +1,6 @@
 #include <errno.h>
 #include <asm/ptrace.h>
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
diff --git a/arch/um/sys-ppc/shared/sysdep/ptrace.h b/arch/um/sys-ppc/shared/sysdep/ptrace.h
index 0e3230e937e1..efe0c1a3ea9c 100644
--- a/arch/um/sys-ppc/shared/sysdep/ptrace.h
+++ b/arch/um/sys-ppc/shared/sysdep/ptrace.h
@@ -5,7 +5,7 @@
 #ifndef __SYS_PTRACE_PPC_H
 #define __SYS_PTRACE_PPC_H
 
-#include "linux/types.h"
+#include <linux/types.h>
 
 /* the following taken from <asm-ppc/ptrace.h> */
 
diff --git a/arch/um/sys-ppc/sigcontext.c b/arch/um/sys-ppc/sigcontext.c
index 40694d0f3d15..aac6c83fe44e 100644
--- a/arch/um/sys-ppc/sigcontext.c
+++ b/arch/um/sys-ppc/sigcontext.c
@@ -1,4 +1,4 @@
 #include "asm/ptrace.h"
 #include "asm/sigcontext.h"
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
diff --git a/arch/um/sys-ppc/sysrq.c b/arch/um/sys-ppc/sysrq.c
index 2f816f1a0ff4..f889449f9285 100644
--- a/arch/um/sys-ppc/sysrq.c
+++ b/arch/um/sys-ppc/sysrq.c
@@ -3,8 +3,8 @@
  * Licensed under the GPL
  */
 
-#include "linux/kernel.h"
-#include "linux/smp.h"
+#include <linux/kernel.h>
+#include <linux/smp.h>
 #include "asm/ptrace.h"
 #include "sysrq.h"
 
diff --git a/arch/x86/um/asm/elf.h b/arch/x86/um/asm/elf.h
index 0e07adc8cbe4..0feee2fd5077 100644
--- a/arch/x86/um/asm/elf.h
+++ b/arch/x86/um/asm/elf.h
@@ -6,7 +6,7 @@
 #define __UM_ELF_X86_H
 
 #include <asm/user.h>
-#include "skas.h"
+#include <skas.h>
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/um/bugs_32.c b/arch/x86/um/bugs_32.c
index 17d88cf2c6c4..33daff4dade4 100644
--- a/arch/x86/um/bugs_32.c
+++ b/arch/x86/um/bugs_32.c
@@ -4,9 +4,9 @@
  */
 
 #include <signal.h>
-#include "kern_util.h"
-#include "longjmp.h"
-#include "sysdep/ptrace.h"
+#include <kern_util.h>
+#include <longjmp.h>
+#include <sysdep/ptrace.h>
 #include <generated/asm-offsets.h>
 
 /* Set during early boot */
diff --git a/arch/x86/um/bugs_64.c b/arch/x86/um/bugs_64.c
index 44e02ba2a265..8cc8256c698d 100644
--- a/arch/x86/um/bugs_64.c
+++ b/arch/x86/um/bugs_64.c
@@ -4,7 +4,7 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 void arch_check_bugs(void)
 {
diff --git a/arch/x86/um/fault.c b/arch/x86/um/fault.c
index d670f68532f4..8784ab30d91b 100644
--- a/arch/x86/um/fault.c
+++ b/arch/x86/um/fault.c
@@ -3,7 +3,7 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/ptrace.h"
+#include <sysdep/ptrace.h>
 
 /* These two are from asm-um/uaccess.h and linux/module.h, check them. */
 struct exception_table_entry
diff --git a/arch/x86/um/ldt.c b/arch/x86/um/ldt.c
index 26b0e39d2ce9..8e08176f0bcb 100644
--- a/arch/x86/um/ldt.c
+++ b/arch/x86/um/ldt.c
@@ -7,11 +7,11 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <asm/unistd.h>
-#include "os.h"
-#include "proc_mm.h"
-#include "skas.h"
-#include "skas_ptrace.h"
-#include "sysdep/tls.h"
+#include <os.h>
+#include <proc_mm.h>
+#include <skas.h>
+#include <skas_ptrace.h>
+#include <sysdep/tls.h>
 
 extern int modify_ldt(int func, void *ptr, unsigned long bytecount);
 
diff --git a/arch/x86/um/mem_64.c b/arch/x86/um/mem_64.c
index 546518727a73..c6492e75797b 100644
--- a/arch/x86/um/mem_64.c
+++ b/arch/x86/um/mem_64.c
@@ -1,6 +1,6 @@
-#include "linux/mm.h"
-#include "asm/page.h"
-#include "asm/mman.h"
+#include <linux/mm.h>
+#include <asm/page.h>
+#include <asm/mman.h>
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
diff --git a/arch/x86/um/os-Linux/registers.c b/arch/x86/um/os-Linux/registers.c
index 0cdbb86b012b..41bfe84e11ab 100644
--- a/arch/x86/um/os-Linux/registers.c
+++ b/arch/x86/um/os-Linux/registers.c
@@ -9,8 +9,8 @@
 #ifdef __i386__
 #include <sys/user.h>
 #endif
-#include "longjmp.h"
-#include "sysdep/ptrace_user.h"
+#include <longjmp.h>
+#include <sysdep/ptrace_user.h>
 
 int save_fp_registers(int pid, unsigned long *fp_regs)
 {
diff --git a/arch/x86/um/os-Linux/task_size.c b/arch/x86/um/os-Linux/task_size.c
index efb16c5c9bcf..8502ad30e61b 100644
--- a/arch/x86/um/os-Linux/task_size.c
+++ b/arch/x86/um/os-Linux/task_size.c
@@ -2,7 +2,7 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <sys/mman.h>
-#include "longjmp.h"
+#include <longjmp.h>
 
 #ifdef __i386__
 
diff --git a/arch/x86/um/os-Linux/tls.c b/arch/x86/um/os-Linux/tls.c
index 82276b6071af..9d94b3b76c74 100644
--- a/arch/x86/um/os-Linux/tls.c
+++ b/arch/x86/um/os-Linux/tls.c
@@ -5,7 +5,7 @@
 #include <sys/syscall.h>
 #include <unistd.h>
 
-#include "sysdep/tls.h"
+#include <sysdep/tls.h>
 
 #ifndef PTRACE_GET_THREAD_AREA
 #define PTRACE_GET_THREAD_AREA 25
diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c
index 3b949daa095c..ce3dd4f36f3f 100644
--- a/arch/x86/um/ptrace_32.c
+++ b/arch/x86/um/ptrace_32.c
@@ -3,10 +3,10 @@
  * Licensed under the GPL
  */
 
-#include "linux/mm.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "skas.h"
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <skas.h>
 
 extern int arch_switch_tls(struct task_struct *to);
 
diff --git a/arch/x86/um/ptrace_user.c b/arch/x86/um/ptrace_user.c
index 3960ca1dd35a..617885b18992 100644
--- a/arch/x86/um/ptrace_user.c
+++ b/arch/x86/um/ptrace_user.c
@@ -4,7 +4,7 @@
  */
 
 #include <errno.h>
-#include "ptrace_user.h"
+#include <ptrace_user.h>
 
 int ptrace_getregs(long pid, unsigned long *regs_out)
 {
diff --git a/arch/x86/um/shared/sysdep/ptrace.h b/arch/x86/um/shared/sysdep/ptrace.h
index 6ce2d76eb908..eb9356904ad3 100644
--- a/arch/x86/um/shared/sysdep/ptrace.h
+++ b/arch/x86/um/shared/sysdep/ptrace.h
@@ -2,7 +2,7 @@
 #define __SYSDEP_X86_PTRACE_H
 
 #include <generated/user_constants.h>
-#include "sysdep/faultinfo.h"
+#include <sysdep/faultinfo.h>
 
 #define MAX_REG_OFFSET (UM_FRAME_SIZE)
 #define MAX_REG_NR ((MAX_REG_OFFSET) / sizeof(unsigned long))
diff --git a/arch/x86/um/shared/sysdep/stub.h b/arch/x86/um/shared/sysdep/stub.h
index bd161e300102..3f55e5bd3cec 100644
--- a/arch/x86/um/shared/sysdep/stub.h
+++ b/arch/x86/um/shared/sysdep/stub.h
@@ -1,8 +1,8 @@
 #include <asm/unistd.h>
 #include <sys/mman.h>
 #include <signal.h>
-#include "as-layout.h"
-#include "stub-data.h"
+#include <as-layout.h>
+#include <stub-data.h>
 
 #ifdef __i386__
 #include "stub_32.h"
diff --git a/arch/x86/um/shared/sysdep/syscalls_32.h b/arch/x86/um/shared/sysdep/syscalls_32.h
index 05cb796aecb5..8436079be914 100644
--- a/arch/x86/um/shared/sysdep/syscalls_32.h
+++ b/arch/x86/um/shared/sysdep/syscalls_32.h
@@ -3,8 +3,8 @@
  * Licensed under the GPL
  */
 
-#include "asm/unistd.h"
-#include "sysdep/ptrace.h"
+#include <asm/unistd.h>
+#include <sysdep/ptrace.h>
 
 typedef long syscall_handler_t(struct pt_regs);
 
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index ba7363ecf896..bdaa08cfbcf4 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -11,8 +11,8 @@
 #include <asm/unistd.h>
 #include <asm/uaccess.h>
 #include <asm/ucontext.h>
-#include "frame_kern.h"
-#include "skas.h"
+#include <frame_kern.h>
+#include <skas.h>
 
 #ifdef CONFIG_X86_32
 
diff --git a/arch/x86/um/stub_32.S b/arch/x86/um/stub_32.S
index 54a36ec20cb7..b972649d3a18 100644
--- a/arch/x86/um/stub_32.S
+++ b/arch/x86/um/stub_32.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
 
 	.globl syscall_stub
 .section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_64.S b/arch/x86/um/stub_64.S
index 20e4a96a6dcb..7160b20172d0 100644
--- a/arch/x86/um/stub_64.S
+++ b/arch/x86/um/stub_64.S
@@ -1,4 +1,4 @@
-#include "as-layout.h"
+#include <as-layout.h>
 
 	.globl syscall_stub
 .section .__syscall_stub, "ax"
diff --git a/arch/x86/um/stub_segv.c b/arch/x86/um/stub_segv.c
index b7450bd22e7d..1518d2805ae8 100644
--- a/arch/x86/um/stub_segv.c
+++ b/arch/x86/um/stub_segv.c
@@ -3,9 +3,9 @@
  * Licensed under the GPL
  */
 
-#include "sysdep/stub.h"
-#include "sysdep/faultinfo.h"
-#include "sysdep/mcontext.h"
+#include <sysdep/stub.h>
+#include <sysdep/faultinfo.h>
+#include <sysdep/mcontext.h>
 
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
diff --git a/arch/x86/um/tls_32.c b/arch/x86/um/tls_32.c
index baba84f8ecb8..5f5feff3d24c 100644
--- a/arch/x86/um/tls_32.c
+++ b/arch/x86/um/tls_32.c
@@ -3,12 +3,12 @@
  * Licensed under the GPL
  */
 
-#include "linux/percpu.h"
-#include "linux/sched.h"
-#include "asm/uaccess.h"
-#include "os.h"
-#include "skas.h"
-#include "sysdep/tls.h"
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <os.h>
+#include <skas.h>
+#include <sysdep/tls.h>
 
 /*
  * If needed we can detect when it's uninitialized.
diff --git a/arch/x86/um/tls_64.c b/arch/x86/um/tls_64.c
index f7ba46200ecd..d22363cb854e 100644
--- a/arch/x86/um/tls_64.c
+++ b/arch/x86/um/tls_64.c
@@ -1,4 +1,4 @@
-#include "linux/sched.h"
+#include <linux/sched.h>
 
 void clear_flushed_tls(struct task_struct *task)
 {
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 1fe731337f07..9c88da0e855a 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -1,7 +1,7 @@
 #ifndef __UM_FS_HOSTFS
 #define __UM_FS_HOSTFS
 
-#include "os.h"
+#include <os.h>
 
 /*
  * These are exactly the same definitions as in fs.h, but the names are
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 124146543aa7..9e45f65241ee 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -16,8 +16,8 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include "hostfs.h"
-#include "init.h"
-#include "kern.h"
+#include <init.h>
+#include <kern.h>
 
 struct hostfs_inode_info {
 	int fd;
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index a74ad0d371c2..67838f3aa20a 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -15,7 +15,6 @@
 #include <sys/types.h>
 #include <sys/vfs.h>
 #include "hostfs.h"
-#include "os.h"
 #include <utime.h>
 
 static void stat64_to_hostfs(const struct stat64 *buf, struct hostfs_stat *p)
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index c1dffe47fde2..a68cb2092512 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -18,7 +18,7 @@
 #include <linux/pid_namespace.h>
 #include <linux/namei.h>
 #include <asm/uaccess.h>
-#include "os.h"
+#include <os.h>
 
 static struct inode *get_inode(struct super_block *, struct dentry *);
 
-- 
cgit v1.2.3


From 42c12213141d7c88e8f649b4bacd19ba14577658 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 8 Aug 2012 21:12:19 -0500
Subject: kgdb,x86: fix warning about unused variable

When compiling without CONFIG_DEBUG_RODATA the following
compiler warning is generated:

arch/x86/kernel/kgdb.c: In function 'kgdb_arch_set_breakpoint':
arch/x86/kernel/kgdb.c:749: warning: unused variable 'opc'

The variable instantiation needs to be inside the #ifdef to
make the warning go away.

Reported-by: Thiago Rafael Becker <trbecker@trbecker.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 3f61904365cf..836f8322960e 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -746,7 +746,9 @@ void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
 int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
 {
 	int err;
+#ifdef CONFIG_DEBUG_RODATA
 	char opc[BREAK_INSTR_SIZE];
+#endif /* CONFIG_DEBUG_RODATA */
 
 	bpt->type = BP_BREAKPOINT;
 	err = probe_kernel_read(bpt->saved_instr, (char *)bpt->bpt_addr,
-- 
cgit v1.2.3


From cd0608e71e9757f4dae35bcfb4e88f4d1a03a8ab Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 10 Oct 2012 13:30:47 -0400
Subject: xen/bootup: allow read_tscp call for Xen PV guests.

The hypervisor will trap it. However without this patch,
we would crash as the .read_tscp is set to NULL. This patch
fixes it and sets it to the native_read_tscp call.

CC: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 70f140447a28..668bbfee0cce 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1164,6 +1164,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_tsc = native_read_tsc,
 	.read_pmc = native_read_pmc,
 
+	.read_tscp = native_read_tscp,
+
 	.iret = xen_iret,
 	.irq_enable_sysexit = xen_sysexit,
 #ifdef CONFIG_X86_64
-- 
cgit v1.2.3


From 1a7bbda5b1ab0e02622761305a32dc38735b90b2 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 10 Oct 2012 13:25:48 -0400
Subject: xen/bootup: allow {read|write}_cr8 pvops call.

We actually do not do anything about it. Just return a default
value of zero and if the kernel tries to write anything but 0
we BUG_ON.

This fixes the case when an user tries to suspend the machine
and it blows up in save_processor_state b/c 'read_cr8' is set
to NULL and we get:

kernel BUG at /home/konrad/ssd/linux/arch/x86/include/asm/paravirt.h:100!
invalid opcode: 0000 [#1] SMP
Pid: 2687, comm: init.late Tainted: G           O 3.6.0upstream-00002-gac264ac-dirty #4 Bochs Bochs
RIP: e030:[<ffffffff814d5f42>]  [<ffffffff814d5f42>] save_processor_state+0x212/0x270

.. snip..
Call Trace:
 [<ffffffff810733bf>] do_suspend_lowlevel+0xf/0xac
 [<ffffffff8107330c>] ? x86_acpi_suspend_lowlevel+0x10c/0x150
 [<ffffffff81342ee2>] acpi_suspend_enter+0x57/0xd5

CC: stable@vger.kernel.org
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 arch/x86/xen/enlighten.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 668bbfee0cce..4466feb4c69b 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -987,7 +987,16 @@ static void xen_write_cr4(unsigned long cr4)
 
 	native_write_cr4(cr4);
 }
-
+#ifdef CONFIG_X86_64
+static inline unsigned long xen_read_cr8(void)
+{
+	return 0;
+}
+static inline void xen_write_cr8(unsigned long val)
+{
+	BUG_ON(val);
+}
+#endif
 static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high)
 {
 	int ret;
@@ -1156,6 +1165,11 @@ static const struct pv_cpu_ops xen_cpu_ops __initconst = {
 	.read_cr4_safe = native_read_cr4_safe,
 	.write_cr4 = xen_write_cr4,
 
+#ifdef CONFIG_X86_64
+	.read_cr8 = xen_read_cr8,
+	.write_cr8 = xen_write_cr8,
+#endif
+
 	.wbinvd = native_wbinvd,
 
 	.read_msr = native_read_msr_safe,
-- 
cgit v1.2.3


From 22e2430d60dbdfcdd732a086e9ef2dbd74c266d1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Oct 2012 21:35:42 -0400
Subject: x86, um: convert to saner kernel_execve() semantics

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/um/include/asm/processor-generic.h |  2 --
 arch/um/include/shared/os.h             |  1 -
 arch/um/kernel/exec.c                   |  5 -----
 arch/um/kernel/process.c                | 10 +++-------
 arch/um/os-Linux/process.c              | 13 -------------
 arch/x86/Kconfig                        |  1 +
 arch/x86/include/asm/unistd.h           |  1 -
 arch/x86/kernel/entry_32.S              | 31 ++++++++++++-------------------
 arch/x86/kernel/entry_64.S              | 24 ++++--------------------
 arch/x86/um/Kconfig                     |  1 +
 10 files changed, 21 insertions(+), 68 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/um/include/asm/processor-generic.h b/arch/um/include/asm/processor-generic.h
index 5d9ab0c4f488..62435a00e70e 100644
--- a/arch/um/include/asm/processor-generic.h
+++ b/arch/um/include/asm/processor-generic.h
@@ -26,7 +26,6 @@ struct thread_struct {
 	jmp_buf *fault_catcher;
 	struct task_struct *prev_sched;
 	unsigned long temp_stack;
-	jmp_buf *exec_buf;
 	struct arch_thread arch;
 	jmp_buf switch_buf;
 	int mm_count;
@@ -54,7 +53,6 @@ struct thread_struct {
 	.fault_addr		= NULL, \
 	.prev_sched		= NULL, \
 	.temp_stack		= 0, \
-	.exec_buf		= NULL, \
 	.arch			= INIT_ARCH_THREAD, \
 	.request		= { 0 } \
 }
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index 89b686c1a3ea..25dbd372d322 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -191,7 +191,6 @@ extern int os_getpid(void);
 extern int os_getpgrp(void);
 
 extern void init_new_thread_signals(void);
-extern int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr);
 
 extern int os_map_memory(void *virt, int fd, unsigned long long off,
 			 unsigned long len, int r, int w, int x);
diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c
index e427301f55d6..565ca396d83e 100644
--- a/arch/um/kernel/exec.c
+++ b/arch/um/kernel/exec.c
@@ -47,8 +47,3 @@ void start_thread(struct pt_regs *regs, unsigned long eip, unsigned long esp)
 #endif
 }
 EXPORT_SYMBOL(start_thread);
-
-void __noreturn ret_from_kernel_execve(struct pt_regs *unused)
-{
-	UML_LONGJMP(current->thread.exec_buf, 1);
-}
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index a1b50add48a2..94b0d8b9810b 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -135,14 +135,10 @@ void new_thread_handler(void)
 	arg = current->thread.request.u.thread.arg;
 
 	/*
-	 * The return value is 1 if the kernel thread execs a process,
-	 * 0 if it just exits
+	 * callback returns only if the kernel thread execs a process
 	 */
-	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
-	if (n == 1)
-		userspace(&current->thread.regs.regs);
-	else
-		do_exit(0);
+	n = fn(arg);
+	userspace(&current->thread.regs.regs);
 }
 
 /* Called magically, see new_thread_handler above */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 307f173e7f82..a04ec167a9c3 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -244,16 +244,3 @@ void init_new_thread_signals(void)
 	signal(SIGWINCH, SIG_IGN);
 	signal(SIGTERM, SIG_DFL);
 }
-
-int run_kernel_thread(int (*fn)(void *), void *arg, jmp_buf **jmp_ptr)
-{
-	jmp_buf buf;
-	int n;
-
-	*jmp_ptr = &buf;
-	n = UML_SETJMP(&buf);
-	if (n != 0)
-		return n;
-	(*fn)(arg);
-	return 0;
-}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d93eb9d1bb97..45edcba41e39 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -98,6 +98,7 @@ config X86
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config INSTRUCTION_DECODER
 	def_bool (KPROBES || PERF_EVENTS || UPROBES)
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h
index 55d155560fdf..16f3fc6ebf2e 100644
--- a/arch/x86/include/asm/unistd.h
+++ b/arch/x86/include/asm/unistd.h
@@ -51,7 +51,6 @@
 # define __ARCH_WANT_SYS_UTIME
 # define __ARCH_WANT_SYS_WAITPID
 # define __ARCH_WANT_SYS_EXECVE
-# define __ARCH_WANT_KERNEL_EXECVE
 
 /*
  * "Conditional" syscalls
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index fe4cc305d8da..91d295908c30 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -298,12 +298,20 @@ ENTRY(ret_from_fork)
 	CFI_ENDPROC
 END(ret_from_fork)
 
-ENTRY(ret_from_kernel_execve)
-	movl %eax, %esp
-	movl $0,PT_EAX(%esp)
+ENTRY(ret_from_kernel_thread)
+	CFI_STARTPROC
+	pushl_cfi %eax
+	call schedule_tail
 	GET_THREAD_INFO(%ebp)
+	popl_cfi %eax
+	pushl_cfi $0x0202		# Reset kernel eflags
+	popfl_cfi
+	movl PT_EBP(%esp),%eax
+	call *PT_EBX(%esp)
+	movl $0,PT_EAX(%esp)
 	jmp syscall_exit
-END(ret_from_kernel_execve)
+	CFI_ENDPROC
+ENDPROC(ret_from_kernel_thread)
 
 /*
  * Interrupt exit functions should be protected against kprobes
@@ -994,21 +1002,6 @@ END(spurious_interrupt_bug)
  */
 	.popsection
 
-ENTRY(ret_from_kernel_thread)
-	CFI_STARTPROC
-	pushl_cfi %eax
-	call schedule_tail
-	GET_THREAD_INFO(%ebp)
-	popl_cfi %eax
-	pushl_cfi $0x0202		# Reset kernel eflags
-	popfl_cfi
-	movl PT_EBP(%esp),%eax
-	call *PT_EBX(%esp)
-	call do_exit
-	ud2			# padding for call trace
-	CFI_ENDPROC
-ENDPROC(ret_from_kernel_thread)
-
 #ifdef CONFIG_XEN
 /* Xen doesn't set %esp to be precisely what the normal sysenter
    entrypoint expects, so fix it up before using the normal path. */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 053c9552ffd9..e1f98c22003e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -459,15 +459,13 @@ ENTRY(ret_from_fork)
 	jmp ret_from_sys_call			# go to the SYSRET fastpath
 
 1:
-	subq $REST_SKIP, %rsp	# move the stack pointer back
+	subq $REST_SKIP, %rsp	# leave space for volatiles
 	CFI_ADJUST_CFA_OFFSET	REST_SKIP
 	movq %rbp, %rdi
 	call *%rbx
-	# exit
-	mov %eax, %edi
-	call do_exit
-	ud2			# padding for call trace
-
+	movl $0, RAX(%rsp)
+	RESTORE_REST
+	jmp int_ret_from_sys_call
 	CFI_ENDPROC
 END(ret_from_fork)
 
@@ -1214,20 +1212,6 @@ bad_gs:
 	jmp  2b
 	.previous
 
-ENTRY(ret_from_kernel_execve)
-	movq %rdi, %rsp
-	movl $0, RAX(%rsp)
-	// RESTORE_REST
-	movq 0*8(%rsp), %r15
-	movq 1*8(%rsp), %r14
-	movq 2*8(%rsp), %r13
-	movq 3*8(%rsp), %r12
-	movq 4*8(%rsp), %rbp
-	movq 5*8(%rsp), %rbx
-	addq $(6*8), %rsp
-	jmp int_ret_from_sys_call
-END(ret_from_kernel_execve)
-
 /* Call softirq on interrupt stack. Interrupts are off. */
 ENTRY(call_softirq)
 	CFI_STARTPROC
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index da85b6fc8e8e..cab8eb88dd22 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -14,6 +14,7 @@ config UML_X86
 	def_bool y
 	select GENERIC_FIND_FIRST_BIT
 	select GENERIC_KERNEL_THREAD
+	select GENERIC_KERNEL_EXECVE
 
 config 64BIT
 	bool "64-bit kernel" if SUBARCH = "x86"
-- 
cgit v1.2.3


From b6eea87fc6850d3531a64a27d2323a4498cd4e43 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 12 Oct 2012 11:19:59 +0100
Subject: x86, boot: Explicitly include autoconf.h for hostprogs

The hostprogs need access to the CONFIG_* symbols found in
include/generated/autoconf.h.  But commit abbf1590de22 ("UAPI: Partition
the header include path sets and add uapi/ header directories") replaced
$(LINUXINCLUDE) with $(USERINCLUDE) which doesn't contain the necessary
include paths.

This has the undesirable effect of breaking the EFI boot stub because
the #ifdef CONFIG_EFI_STUB code in arch/x86/boot/tools/build.c is
never compiled.

It should also be noted that because $(USERINCLUDE) isn't exported by
the top-level Makefile it's actually empty in arch/x86/boot/Makefile.

Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/boot/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/x86')

diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile
index ce03476d8c8f..ccce0ed67dde 100644
--- a/arch/x86/boot/Makefile
+++ b/arch/x86/boot/Makefile
@@ -37,7 +37,8 @@ setup-y		+= video-bios.o
 targets		+= $(setup-y)
 hostprogs-y	:= mkcpustr tools/build
 
-HOST_EXTRACFLAGS += -I$(srctree)/tools/include $(USERINCLUDE) \
+HOST_EXTRACFLAGS += -I$(srctree)/tools/include \
+		    -include include/generated/autoconf.h \
 	            -D__EXPORTED_HEADERS__
 
 $(obj)/cpu.o: $(obj)/cpustr.h
-- 
cgit v1.2.3


From 3ce9e53e788881da0d5f3912f80e0dd6b501f304 Mon Sep 17 00:00:00 2001
From: Michal Marek <mmarek@suse.cz>
Date: Mon, 15 Oct 2012 21:16:56 +0200
Subject: kbuild: Fix accidental revert in commit fe04ddf

Commit fe04ddf7c291 ("kbuild: Do not package /boot and /lib in make
tar-pkg") accidentally reverted two previous kbuild commits.  I don't
know what I was thinking.

This brings back changes made by commits 24cc7fb69a5b ("x86/kbuild:
archscripts depends on scripts_basic") and c1c1a59e37da ("firmware: fix
directory creation rule matching with make 3.80")

Reported-by: Jan Beulich <JBeulich@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Michal Marek <mmarek@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Makefile       | 2 +-
 scripts/Makefile.fwinst | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 58790bd85c1d..05afcca66de6 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -142,7 +142,7 @@ KBUILD_CFLAGS += $(call cc-option,-mno-avx,)
 KBUILD_CFLAGS += $(mflags-y)
 KBUILD_AFLAGS += $(mflags-y)
 
-archscripts:
+archscripts: scripts_basic
 	$(Q)$(MAKE) $(build)=arch/x86/tools relocs
 
 ###
diff --git a/scripts/Makefile.fwinst b/scripts/Makefile.fwinst
index c3f69ae275d1..4d908d16c035 100644
--- a/scripts/Makefile.fwinst
+++ b/scripts/Makefile.fwinst
@@ -27,7 +27,7 @@ endif
 installed-mod-fw := $(addprefix $(INSTALL_FW_PATH)/,$(mod-fw))
 
 installed-fw := $(addprefix $(INSTALL_FW_PATH)/,$(fw-shipped-all))
-installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/.
+installed-fw-dirs := $(sort $(dir $(installed-fw))) $(INSTALL_FW_PATH)/./
 
 # Workaround for make < 3.81, where .SECONDEXPANSION doesn't work.
 PHONY += $(INSTALL_FW_PATH)/$$(%) install-all-dirs
@@ -42,7 +42,7 @@ quiet_cmd_install = INSTALL $(subst $(srctree)/,,$@)
 $(installed-fw-dirs):
 	$(call cmd,mkdir)
 
-$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $$(dir $(INSTALL_FW_PATH)/%)
+$(installed-fw): $(INSTALL_FW_PATH)/%: $(obj)/% | $(INSTALL_FW_PATH)/$$(dir %)
 	$(call cmd,install)
 
 PHONY +=  __fw_install __fw_modinst FORCE
-- 
cgit v1.2.3


From 32bec973a8435afc0b032da22174701f836549b2 Mon Sep 17 00:00:00 2001
From: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Date: Thu, 18 Oct 2012 23:24:57 +0300
Subject: crypto: aesni - fix XTS mode on x86-32, add wrapper function for
 asmlinkage aesni_enc()

Calling convention for internal functions and 'asmlinkage' functions is
different on x86-32. Therefore do not directly cast aesni_enc as XTS tweak
function, but use wrapper function in between. Fixes crash with "XTS +
aesni_intel + x86-32" combination.

Cc: stable@vger.kernel.org
Reported-by: Krzysztof Kolasa <kkolasa@winsoft.pl>
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/crypto/aesni-intel_glue.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 7c04d0da709b..1b9c22bea8a7 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -515,6 +515,11 @@ static int xts_aesni_setkey(struct crypto_tfm *tfm, const u8 *key,
 }
 
 
+static void aesni_xts_tweak(void *ctx, u8 *out, const u8 *in)
+{
+	aesni_enc(ctx, out, in);
+}
+
 static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		       struct scatterlist *src, unsigned int nbytes)
 {
@@ -525,7 +530,7 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		.tbuflen = sizeof(buf),
 
 		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.tweak_fn = aesni_xts_tweak,
 		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
 		.crypt_fn = lrw_xts_encrypt_callback,
 	};
@@ -550,7 +555,7 @@ static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
 		.tbuflen = sizeof(buf),
 
 		.tweak_ctx = aes_ctx(ctx->raw_tweak_ctx),
-		.tweak_fn = XTS_TWEAK_CAST(aesni_enc),
+		.tweak_fn = aesni_xts_tweak,
 		.crypt_ctx = aes_ctx(ctx->raw_crypt_ctx),
 		.crypt_fn = lrw_xts_decrypt_callback,
 	};
-- 
cgit v1.2.3


From 5bc66170dc486556a1e36fd384463536573f4b82 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Thu, 18 Oct 2012 15:10:56 +0200
Subject: x86, MCE: Remove bios_cmci_threshold sysfs attribute

450cc201038f3 ("x86/mce: Provide boot argument to honour bios-set CMCI
threshold") added the bios_cmci_threshold sysfs attribute which was
supposed to communicate to userspace tools that BIOS CMCI threshold has
been honoured.

However, this info is not of any importance to userspace - it should
rather get the actual error count it has been thresholded already from
MCi_STATUS[38:52].

So drop this before it becomes a used interface (good thing we caught
this early in 3.7-rc1, right after the merge window closed).

Cc: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Tony Luck <tony.luck@intel.com>
Link: http://lkml.kernel.org/r/20121017105940.GA14590@x1.osrc.amd.com
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
---
 arch/x86/kernel/cpu/mcheck/mce.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'arch/x86')

diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 29e87d3b2843..46cbf8689692 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -2209,11 +2209,6 @@ static struct dev_ext_attribute dev_attr_cmci_disabled = {
 	&mce_cmci_disabled
 };
 
-static struct dev_ext_attribute dev_attr_bios_cmci_threshold = {
-	__ATTR(bios_cmci_threshold, 0444, device_show_int, NULL),
-	&mce_bios_cmci_threshold
-};
-
 static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_tolerant.attr,
 	&dev_attr_check_interval.attr,
@@ -2222,7 +2217,6 @@ static struct device_attribute *mce_device_attrs[] = {
 	&dev_attr_dont_log_ce.attr,
 	&dev_attr_ignore_ce.attr,
 	&dev_attr_cmci_disabled.attr,
-	&dev_attr_bios_cmci_threshold.attr,
 	NULL
 };
 
-- 
cgit v1.2.3