From 59ea746337c69f6a5f1bc4d5e8544b3cbf12f801 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jirislaby@gmail.com>
Date: Thu, 12 Jun 2008 13:56:40 +0200
Subject: MM: virtual address debug

Add some (configurable) expensive sanity checking to catch wrong address
translations on x86.

- create linux/mmdebug.h file to be able include this file in
  asm headers to not get unsolvable loops in header files
- __phys_addr on x86_32 became a function in ioremap.c since
  PAGE_OFFSET, is_vmalloc_addr and VMALLOC_* non-constasts are undefined
  if declared in page_32.h
- add __phys_addr_const for initializing doublefault_tss.__cr3

Tested on 386, 386pae, x86_64 and x86_64 numa=fake=2.

Contains Andi's enable numa virtual address debug patch.

Signed-off-by: Jiri Slaby <jirislaby@gmail.com>
Cc: Andi Kleen <andi@firstfloor.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/Kconfig.debug | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d2099f41aa1e..9d9dc0ddf13a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -469,6 +469,15 @@ config DEBUG_VM
 
 	  If unsure, say N.
 
+config DEBUG_VIRTUAL
+	bool "Debug VM translations"
+	depends on DEBUG_KERNEL && X86
+	help
+	  Enable some costly sanity checks in virtual to page code. This can
+	  catch mistakes with virt_to_page() and friends.
+
+	  If unsure, say N.
+
 config DEBUG_WRITECOUNT
 	bool "Debug filesystem writers count"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From d974ae379a2fbe8948f01eabbc6b19c0a80f09bc Mon Sep 17 00:00:00 2001
From: Jeremy Fitzhardinge <jeremy@goop.org>
Date: Thu, 24 Jul 2008 16:27:46 -0700
Subject: generic, memparse(): constify argument

memparse()'s first argument can be const, so it should be.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/kernel.h | 2 +-
 lib/cmdline.c          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fdbbf72ca2eb..7889c2f9b75d 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -176,7 +176,7 @@ extern int vsscanf(const char *, const char *, va_list)
 
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
-extern unsigned long long memparse(char *ptr, char **retptr);
+extern unsigned long long memparse(const char *ptr, char **retptr);
 
 extern int core_kernel_text(unsigned long addr);
 extern int __kernel_text_address(unsigned long addr);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 5ba8a942a478..f5f3ad8b62ff 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -126,7 +126,7 @@ char *get_options(const char *str, int nints, int *ints)
  *	megabyte, or one gigabyte, respectively.
  */
 
-unsigned long long memparse(char *ptr, char **retptr)
+unsigned long long memparse(const char *ptr, char **retptr)
 {
 	char *endptr;	/* local pointer to end of parsed string */
 
-- 
cgit v1.2.3


From 67182ae1c42206e516f7efb292b745e826497b24 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sun, 10 Aug 2008 18:35:38 -0700
Subject: rcu, debug: detect stalled grace periods

this is a diagnostic patch for Classic RCU.

The approach is to record a timestamp at the beginning
of the grace period (in rcu_start_batch()), then have
rcu_check_callbacks() complain if:

 1.	it is running on a CPU that has holding up grace periods for
 	a long time (say one second).  This will identify the culprit
 	assuming that the culprit has not disabled hardware irqs,
 	instruction execution, or some such.

 2.	it is running on a CPU that is not holding up grace periods,
 	but grace periods have been held up for an even longer time
 	(say two seconds).

It is enabled via the default-off CONFIG_DEBUG_RCU_STALL kernel parameter.

Rather than exponential backoff, it backs off to once per 30 seconds.
My feeling upon thinking on it was that if you have stalled RCU grace
periods for that long, a few extra printk() messages are probably the
least of your worries...

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Yinghai Lu <yhlu.kernel@gmail.com>
Cc: David Witbrodt <dawitbro@sbcglobal.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h |  3 ++
 kernel/rcuclassic.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/Kconfig.debug          | 13 ++++++++
 3 files changed, 96 insertions(+)

(limited to 'lib')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 04c728147be0..16589958b40e 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -46,6 +46,9 @@ struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
 	long	pending;	/* Number of the last pending batch           */
+#ifdef CONFIG_DEBUG_RCU_STALL
+	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
+#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
 
 	int	signaled;
 
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index d4271146a9bd..d7ec731de75c 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -47,6 +47,7 @@
 #include <linux/notifier.h>
 #include <linux/cpu.h>
 #include <linux/mutex.h>
+#include <linux/time.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 static struct lock_class_key rcu_lock_key;
@@ -286,6 +287,81 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   rcu_check_quiescent_state calls rcu_start_batch(0) to start the next grace
  *   period (if necessary).
  */
+
+#ifdef CONFIG_DEBUG_RCU_STALL
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_check = get_seconds() + 3;
+}
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock(&rcp->lock);
+	delta = get_seconds() - rcp->gp_check;
+	if (delta < 2L ||
+	    cpus_empty(rcp->cpumask)) {
+		spin_unlock(&rcp->lock);
+		return;
+	rcp->gp_check = get_seconds() + 30;
+	}
+	spin_unlock(&rcp->lock);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_cpu_mask(cpu, rcp->cpumask)
+		printk(" %d", cpu);
+	printk(" (detected by %d, t=%lu/%lu)\n",
+	       smp_processor_id(), get_seconds(), rcp->gp_check);
+}
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
+			smp_processor_id(), get_seconds(), rcp->gp_check);
+	dump_stack();
+	spin_lock(&rcp->lock);
+	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
+		rcp->gp_check = get_seconds() + 30;
+	spin_unlock(&rcp->lock);
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+				   struct rcu_data *rdp)
+{
+	long delta;
+
+	delta = get_seconds() - rcp->gp_check;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
+
+		/* We haven't checked in, so go dump stack. */
+
+		print_cpu_stall(rcp);
+
+	} else if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
+
+		/* They had two seconds to dump stack, so complain. */
+
+		print_other_cpu_stall(rcp);
+
+	}
+}
+
+#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
+
+static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+static inline void check_cpu_stall(struct rcu_ctrlblk *rcp,
+				   struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
+
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -296,6 +372,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 	if (rcp->cur != rcp->pending &&
 			rcp->completed == rcp->cur) {
 		rcp->cur++;
+		record_gp_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -489,6 +566,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
+	/* Check for CPU stalls, if enabled. */
+	check_cpu_stall(rcp, rdp);
+
 	if (rdp->nxtlist) {
 		/*
 		 * This cpu has pending rcu entries and the grace period
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e1d4764435ed..2fb6d90bf1e6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,6 +597,19 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
+config RCU_CPU_STALL
+	bool "Check for stalled CPUs delaying RCU grace periods"
+	depends on CLASSIC_RCU
+	default n
+	help
+	  This option causes RCU to printk information on which
+	  CPUs are delaying the current grace period, but only when
+	  the grace period extends for excessive time periods.
+
+	  Say Y if you want RCU to perform such checks.
+
+	  Say N if you are unsure.
+
 config KPROBES_SANITY_TEST
 	bool "Kprobes sanity tests"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 1f7c14c62ce63805f9574664a6c6de3633d4a354 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Thu, 9 Oct 2008 12:50:59 -0400
Subject: percpu counter: clean up percpu_counter_sum_and_set()

percpu_counter_sum_and_set() and percpu_counter_sum() is the same except
the former updates the global counter after accounting.  Since we are
taking the fbc->lock to calculate the precise value of the counter in
percpu_counter_sum() anyway, it should simply set fbc->count too, as the
percpu_counter_sum_and_set() does.

This patch merges these two interfaces into one.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/balloc.c               |  2 +-
 include/linux/percpu_counter.h | 12 +++---------
 lib/percpu_counter.c           |  8 +++-----
 3 files changed, 7 insertions(+), 15 deletions(-)

(limited to 'lib')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9fa960ba6da..00a94d5866c2 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1624,7 +1624,7 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi,
 #ifdef CONFIG_SMP
 	if (free_blocks - root_blocks < FBC_BATCH)
 		free_blocks =
-			percpu_counter_sum_and_set(&sbi->s_freeblocks_counter);
+			percpu_counter_sum(&sbi->s_freeblocks_counter);
 #endif
 	if (free_blocks <= root_blocks)
 		/* we don't have free space */
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 208388835357..9007ccdfc112 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -35,7 +35,7 @@ int percpu_counter_init_irq(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set);
+s64 __percpu_counter_sum(struct percpu_counter *fbc);
 
 static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 {
@@ -44,19 +44,13 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
-	s64 ret = __percpu_counter_sum(fbc, 0);
+	s64 ret = __percpu_counter_sum(fbc);
 	return ret < 0 ? 0 : ret;
 }
 
-static inline s64 percpu_counter_sum_and_set(struct percpu_counter *fbc)
-{
-	return __percpu_counter_sum(fbc, 1);
-}
-
-
 static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 {
-	return __percpu_counter_sum(fbc, 0);
+	return __percpu_counter_sum(fbc);
 }
 
 static inline s64 percpu_counter_read(struct percpu_counter *fbc)
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 4a8ba4bf5f6f..a8663890a88c 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(__percpu_counter_add);
  * Add up all the per-cpu counts, return the result.  This is a more accurate
  * but much slower version of percpu_counter_read_positive()
  */
-s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
 {
 	s64 ret;
 	int cpu;
@@ -62,11 +62,9 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc, int set)
 	for_each_online_cpu(cpu) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
-		if (set)
-			*pcount = 0;
+		*pcount = 0;
 	}
-	if (set)
-		fbc->count = ret;
+	fbc->count = ret;
 
 	spin_unlock(&fbc->lock);
 	return ret;
-- 
cgit v1.2.3


From 673d62cc5ea6fca046650f17f77985b112c62322 Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Sun, 31 Aug 2008 23:39:21 +0200
Subject: debugobjects: fix lockdep warning

Daniel J. Blueman reported:
> =======================================================
> [ INFO: possible circular locking dependency detected ]
> 2.6.27-rc4-224c #1
> -------------------------------------------------------
> hald/4680 is trying to acquire lock:
>  (&n->list_lock){++..}, at: [<ffffffff802bfa26>] add_partial+0x26/0x80
>
> but task is already holding lock:
>  (&obj_hash[i].lock){++..}, at: [<ffffffff8041cfdc>]
> debug_object_free+0x5c/0x120

We fix it by moving the actual freeing to outside the lock (the lock
now only protects the list).

The pool lock is also promoted to irq-safe (suggested by Dan). It's
necessary because free_pool is now called outside the irq disabled
region. So we need to protect against an interrupt handler which calls
debug_object_init().

[tglx@linutronix.de: added hlist_move_list helper to avoid looping
		     through the list twice]

Reported-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/list.h | 13 +++++++++++++
 lib/debugobjects.c   | 31 +++++++++++++++++++++++--------
 2 files changed, 36 insertions(+), 8 deletions(-)

(limited to 'lib')

diff --git a/include/linux/list.h b/include/linux/list.h
index db35ef02e745..969f6e92d089 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -619,6 +619,19 @@ static inline void hlist_add_after(struct hlist_node *n,
 		next->next->pprev  = &next->next;
 }
 
+/*
+ * Move a list from one list head to another. Fixup the pprev
+ * reference of the first entry if it exists.
+ */
+static inline void hlist_move_list(struct hlist_head *old,
+				   struct hlist_head *new)
+{
+	new->first = old->first;
+	if (new->first)
+		new->first->pprev = &new->first;
+	old->first = NULL;
+}
+
 #define hlist_entry(ptr, type, member) container_of(ptr,type,member)
 
 #define hlist_for_each(pos, head) \
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 45a6bde762d1..e3ab374e1334 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -112,6 +112,7 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
 
 /*
  * Allocate a new object. If the pool is empty, switch off the debugger.
+ * Must be called with interrupts disabled.
  */
 static struct debug_obj *
 alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
@@ -148,17 +149,18 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 static void free_object(struct debug_obj *obj)
 {
 	unsigned long idx = (unsigned long)(obj - obj_static_pool);
+	unsigned long flags;
 
 	if (obj_pool_free < ODEBUG_POOL_SIZE || idx < ODEBUG_POOL_SIZE) {
-		spin_lock(&pool_lock);
+		spin_lock_irqsave(&pool_lock, flags);
 		hlist_add_head(&obj->node, &obj_pool);
 		obj_pool_free++;
 		obj_pool_used--;
-		spin_unlock(&pool_lock);
+		spin_unlock_irqrestore(&pool_lock, flags);
 	} else {
-		spin_lock(&pool_lock);
+		spin_lock_irqsave(&pool_lock, flags);
 		obj_pool_used--;
-		spin_unlock(&pool_lock);
+		spin_unlock_irqrestore(&pool_lock, flags);
 		kmem_cache_free(obj_cache, obj);
 	}
 }
@@ -171,6 +173,7 @@ static void debug_objects_oom(void)
 {
 	struct debug_bucket *db = obj_hash;
 	struct hlist_node *node, *tmp;
+	HLIST_HEAD(freelist);
 	struct debug_obj *obj;
 	unsigned long flags;
 	int i;
@@ -179,11 +182,14 @@ static void debug_objects_oom(void)
 
 	for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
 		spin_lock_irqsave(&db->lock, flags);
-		hlist_for_each_entry_safe(obj, node, tmp, &db->list, node) {
+		hlist_move_list(&db->list, &freelist);
+		spin_unlock_irqrestore(&db->lock, flags);
+
+		/* Now free them */
+		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
 			hlist_del(&obj->node);
 			free_object(obj);
 		}
-		spin_unlock_irqrestore(&db->lock, flags);
 	}
 }
 
@@ -498,8 +504,9 @@ void debug_object_free(void *addr, struct debug_obj_descr *descr)
 		return;
 	default:
 		hlist_del(&obj->node);
+		spin_unlock_irqrestore(&db->lock, flags);
 		free_object(obj);
-		break;
+		return;
 	}
 out_unlock:
 	spin_unlock_irqrestore(&db->lock, flags);
@@ -510,6 +517,7 @@ static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
 	struct hlist_node *node, *tmp;
+	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -545,11 +553,18 @@ repeat:
 				goto repeat;
 			default:
 				hlist_del(&obj->node);
-				free_object(obj);
+				hlist_add_head(&obj->node, &freelist);
 				break;
 			}
 		}
 		spin_unlock_irqrestore(&db->lock, flags);
+
+		/* Now free them */
+		hlist_for_each_entry_safe(obj, node, tmp, &freelist, node) {
+			hlist_del(&obj->node);
+			free_object(obj);
+		}
+
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
 	}
-- 
cgit v1.2.3


From 7563dc64585324f443f5ac107eb6d89ee813a2d2 Mon Sep 17 00:00:00 2001
From: Tony Breeds <tony@bakeyournoodle.com>
Date: Tue, 2 Sep 2008 16:50:38 +1000
Subject: powerpc: Work around gcc's -fno-omit-frame-pointer bug

This bug is causing random crashes
(http://bugzilla.kernel.org/show_bug.cgi?id=11414).

-fno-omit-frame-pointer is only needed on powerpc when -pg is also
supplied, and there is a gcc bug that causes incorrect code generation
on 32-bit powerpc when -fno-omit-frame-pointer is used---it uses stack
locations below the stack pointer, which is not allowed by the ABI
because those locations can and sometimes do get corrupted by an
interrupt.

This ensures that CONFIG_FRAME_POINTER is only selected by ftrace.
When CONFIG_FTRACE is enabled we also pass -mno-sched-epilog to work
around the gcc codegen bug.

Patch based on work by:
	Andreas Schwab <schwab@suse.de>
	Segher Boessenkool <segher@kernel.crashing.org>

Signed-off-by: Tony Breeds <tony@bakeyournoodle.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/Makefile                    | 5 +++++
 arch/powerpc/kernel/Makefile             | 7 ++++---
 arch/powerpc/platforms/powermac/Makefile | 2 +-
 lib/Kconfig.debug                        | 6 +++---
 4 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index 9155c9312c1e..c6be19e9ceae 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -116,6 +116,11 @@ ifeq ($(CONFIG_6xx),y)
 KBUILD_CFLAGS		+= -mcpu=powerpc
 endif
 
+# Work around a gcc code-gen bug with -fno-omit-frame-pointer.
+ifeq ($(CONFIG_FTRACE),y)
+KBUILD_CFLAGS		+= -mno-sched-epilog
+endif
+
 cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
 cpu-as-$(CONFIG_6xx)		+= -Wa,-maltivec
 cpu-as-$(CONFIG_POWER4)		+= -Wa,-maltivec
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 64f5948ebc9d..946daea780f1 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -14,12 +14,13 @@ endif
 
 ifdef CONFIG_FTRACE
 # Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -pg
-CFLAGS_REMOVE_prom_init.o = -pg
+CFLAGS_REMOVE_cputable.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_prom_init.o = -pg -mno-sched-epilog
+CFLAGS_REMOVE_btext.o = -pg -mno-sched-epilog
 
 ifdef CONFIG_DYNAMIC_FTRACE
 # dynamic ftrace setup.
-CFLAGS_REMOVE_ftrace.o = -pg
+CFLAGS_REMOVE_ftrace.o = -pg -mno-sched-epilog
 endif
 
 endif
diff --git a/arch/powerpc/platforms/powermac/Makefile b/arch/powerpc/platforms/powermac/Makefile
index 58ecdd72630f..be60d64be7ad 100644
--- a/arch/powerpc/platforms/powermac/Makefile
+++ b/arch/powerpc/platforms/powermac/Makefile
@@ -2,7 +2,7 @@ CFLAGS_bootx_init.o  		+= -fPIC
 
 ifdef CONFIG_FTRACE
 # Do not trace early boot code
-CFLAGS_REMOVE_bootx_init.o = -pg
+CFLAGS_REMOVE_bootx_init.o = -pg -mno-sched-epilog
 endif
 
 obj-y				+= pic.o setup.o time.o feature.o pci.o \
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 8b5a7d304a5f..0b504814e378 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -394,7 +394,7 @@ config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
 	select STACKTRACE
-	select FRAME_POINTER if !X86 && !MIPS
+	select FRAME_POINTER if !X86 && !MIPS && !PPC
 	select KALLSYMS
 	select KALLSYMS_ALL
 
@@ -676,13 +676,13 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER
+	select FRAME_POINTER if !PPC
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
 config LATENCYTOP
 	bool "Latency measuring infrastructure"
-	select FRAME_POINTER if !MIPS
+	select FRAME_POINTER if !MIPS && !PPC
 	select KALLSYMS
 	select KALLSYMS_ALL
 	select STACKTRACE
-- 
cgit v1.2.3


From 68e91d61346db4359464d06617500141cfd1442a Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:10:14 +0900
Subject: swiotlb: remove GFP_DMA hack in swiotlb_alloc_coherent

The callers are supposed to set up the gfp flags appropriately.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 977edbdbc1de..3066ffe1f9eb 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -467,13 +467,6 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	void *ret;
 	int order = get_order(size);
 
-	/*
-	 * XXX fix me: the DMA API should pass us an explicit DMA mask
-	 * instead, or use ZONE_DMA32 (ia64 overloads ZONE_DMA to be a ~32
-	 * bit range instead of a 16MB one).
-	 */
-	flags |= GFP_DMA;
-
 	ret = (void *)__get_free_pages(flags, order);
 	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
 		/*
-- 
cgit v1.2.3


From 9dfda12b8b769e224ca4efbc35ace4524b9c017b Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:48 +0900
Subject: swiotlb: use map_single instead of swiotlb_map_single in
 swiotlb_alloc_coherent

We always need swiotlb memory here so address_needs_mapping and
swiotlb_force testings are irrelevant. map_single should be used here
instead of swiotlb_map_single.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 3066ffe1f9eb..2fb485d0e7eb 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -483,12 +483,9 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * swiotlb_map_single(), which will grab memory from
 		 * the lowest available address range.
 		 */
-		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
-		if (swiotlb_dma_mapping_error(hwdev, handle))
+		ret = map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
+		if (!ret)
 			return NULL;
-
-		ret = bus_to_virt(handle);
 	}
 
 	memset(ret, 0, size);
-- 
cgit v1.2.3


From 21f6c4de4c25c4bdd88c75bc97a78e7fbeebac4d Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:49 +0900
Subject: swiotlb: use unmap_single instead of swiotlb_unmap_single in
 swiotlb_free_coherent

We don't need any check in swiotlb_unmap_single here. unmap_single is
appropriate.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 2fb485d0e7eb..bf61c73a3418 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -513,7 +513,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
-		swiotlb_unmap_single (hwdev, dma_handle, size, DMA_TO_DEVICE);
+		unmap_single(hwdev, vaddr, size, DMA_TO_DEVICE);
 }
 
 static void
-- 
cgit v1.2.3


From 640aebfe014554ced9c38d2564e38862e488d0eb Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 8 Sep 2008 18:53:50 +0900
Subject: swiotlb: add is_swiotlb_buffer helper function

This adds is_swiotlb_buffer() helper function to see whether a buffer
belongs to the swiotlb buffer or not.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index bf61c73a3418..b5f5d1133042 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -283,6 +283,11 @@ address_needs_mapping(struct device *hwdev, dma_addr_t addr)
 	return (addr & ~mask) != 0;
 }
 
+static int is_swiotlb_buffer(char *addr)
+{
+	return addr >= io_tlb_start && addr < io_tlb_end;
+}
+
 /*
  * Allocates bounce buffer and returns its kernel virtual address.
  */
@@ -508,8 +513,7 @@ swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
 		      dma_addr_t dma_handle)
 {
 	WARN_ON(irqs_disabled());
-	if (!(vaddr >= (void *)io_tlb_start
-                    && vaddr < (void *)io_tlb_end))
+	if (!is_swiotlb_buffer(vaddr))
 		free_pages((unsigned long) vaddr, get_order(size));
 	else
 		/* DMA_TO_DEVICE to avoid memcpy in unmap_single */
@@ -602,7 +606,7 @@ swiotlb_unmap_single_attrs(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		unmap_single(hwdev, dma_addr, size, dir);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
@@ -632,7 +636,7 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr);
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		sync_single(hwdev, dma_addr, size, dir, target);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
@@ -663,7 +667,7 @@ swiotlb_sync_single_range(struct device *hwdev, dma_addr_t dev_addr,
 	char *dma_addr = bus_to_virt(dev_addr) + offset;
 
 	BUG_ON(dir == DMA_NONE);
-	if (dma_addr >= io_tlb_start && dma_addr < io_tlb_end)
+	if (is_swiotlb_buffer(dma_addr))
 		sync_single(hwdev, dma_addr, size, dir, target);
 	else if (dir == DMA_FROM_DEVICE)
 		dma_mark_clean(dma_addr, size);
-- 
cgit v1.2.3


From deac93df26b20cf8438339b5935b5f5643bc30c9 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 3 Sep 2008 20:43:36 -0500
Subject: lib: Correct printk %pF to work on all architectures

It was introduced by "vsprintf: add support for '%pS' and '%pF' pointer
formats" in commit 0fe1ef24f7bd0020f29ffe287dfdb9ead33ca0b2.  However,
the current way its coded doesn't work on parisc64.  For two reasons: 1)
parisc isn't in the #ifdef and 2) parisc has a different format for
function descriptors

Make dereference_function_descriptor() more accommodating by allowing
architecture overrides.  I put the three overrides (for parisc64, ppc64
and ia64) in arch/kernel/module.c because that's where the kernel
internal linker which knows how to deal with function descriptors sits.

Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Tony Luck <tony.luck@intel.com>
Acked-by: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/ia64/include/asm/sections.h    |  3 +++
 arch/ia64/kernel/module.c           | 12 ++++++++++++
 arch/parisc/kernel/module.c         | 14 ++++++++++++++
 arch/powerpc/include/asm/sections.h |  3 +++
 arch/powerpc/kernel/module_64.c     | 13 ++++++++++++-
 include/asm-generic/sections.h      |  6 ++++++
 include/asm-parisc/sections.h       |  5 +++++
 lib/vsprintf.c                      | 11 +----------
 8 files changed, 56 insertions(+), 11 deletions(-)

(limited to 'lib')

diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h
index 7286e4a9fe84..a7acad2bc2f0 100644
--- a/arch/ia64/include/asm/sections.h
+++ b/arch/ia64/include/asm/sections.h
@@ -21,5 +21,8 @@ extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_b
 extern char __start_unwind[], __end_unwind[];
 extern char __start_ivt_text[], __end_ivt_text[];
 
+#undef dereference_function_descriptor
+void *dereference_function_descriptor(void *);
+
 #endif /* _ASM_IA64_SECTIONS_H */
 
diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c
index 29aad349e0c4..545626f66a4c 100644
--- a/arch/ia64/kernel/module.c
+++ b/arch/ia64/kernel/module.c
@@ -31,9 +31,11 @@
 #include <linux/elf.h>
 #include <linux/moduleloader.h>
 #include <linux/string.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 
 #include <asm/patch.h>
+#include <asm/sections.h>
 #include <asm/unaligned.h>
 
 #define ARCH_MODULE_DEBUG 0
@@ -941,3 +943,13 @@ module_arch_cleanup (struct module *mod)
 	if (mod->arch.core_unw_table)
 		unw_remove_unwind_table(mod->arch.core_unw_table);
 }
+
+void *dereference_function_descriptor(void *ptr)
+{
+	struct fdesc *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->ip, p))
+		ptr = p;
+	return ptr;
+}
diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c
index fdacdd4341c9..44138c3e6ea7 100644
--- a/arch/parisc/kernel/module.c
+++ b/arch/parisc/kernel/module.c
@@ -47,7 +47,9 @@
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/bug.h>
+#include <linux/uaccess.h>
 
+#include <asm/sections.h>
 #include <asm/unwind.h>
 
 #if 0
@@ -860,3 +862,15 @@ void module_arch_cleanup(struct module *mod)
 	deregister_unwind_table(mod);
 	module_bug_cleanup(mod);
 }
+
+#ifdef CONFIG_64BIT
+void *dereference_function_descriptor(void *ptr)
+{
+	Elf64_Fdesc *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->addr, p))
+		ptr = p;
+	return ptr;
+}
+#endif
diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h
index 916018e425c4..7710e9e6660f 100644
--- a/arch/powerpc/include/asm/sections.h
+++ b/arch/powerpc/include/asm/sections.h
@@ -16,6 +16,9 @@ static inline int in_kernel_text(unsigned long addr)
 	return 0;
 }
 
+#undef dereference_function_descriptor
+void *dereference_function_descriptor(void *);
+
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index ee6a2982d567..ad79de272ff3 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -21,8 +21,9 @@
 #include <linux/err.h>
 #include <linux/vmalloc.h>
 #include <linux/bug.h>
+#include <linux/uaccess.h>
 #include <asm/module.h>
-#include <asm/uaccess.h>
+#include <asm/sections.h>
 #include <asm/firmware.h>
 #include <asm/code-patching.h>
 #include <linux/sort.h>
@@ -451,3 +452,13 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
 
 	return 0;
 }
+
+void *dereference_function_descriptor(void *ptr)
+{
+	struct ppc64_opd_entry *desc = ptr;
+	void *p;
+
+	if (!probe_kernel_address(&desc->funcaddr, p))
+		ptr = p;
+	return ptr;
+}
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 8feeae1f2369..79a7ff925bf8 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -14,4 +14,10 @@ extern char __kprobes_text_start[], __kprobes_text_end[];
 extern char __initdata_begin[], __initdata_end[];
 extern char __start_rodata[], __end_rodata[];
 
+/* function descriptor handling (if any).  Override
+ * in asm/sections.h */
+#ifndef dereference_function_descriptor
+#define dereference_function_descriptor(p) (p)
+#endif
+
 #endif /* _ASM_GENERIC_SECTIONS_H_ */
diff --git a/include/asm-parisc/sections.h b/include/asm-parisc/sections.h
index fdd43ec42ec5..9d13c3507ad6 100644
--- a/include/asm-parisc/sections.h
+++ b/include/asm-parisc/sections.h
@@ -4,4 +4,9 @@
 /* nothing to see, move along */
 #include <asm-generic/sections.h>
 
+#ifdef CONFIG_64BIT
+#undef dereference_function_descriptor
+void *dereference_function_descriptor(void *);
+#endif
+
 #endif
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d8d1d1142248..c399bc1093cb 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -27,6 +27,7 @@
 
 #include <asm/page.h>		/* for PAGE_SIZE */
 #include <asm/div64.h>
+#include <asm/sections.h>	/* for dereference_function_descriptor() */
 
 /* Works only for digits and letters, but small and fast */
 #define TOLOWER(x) ((x) | 0x20)
@@ -513,16 +514,6 @@ static char *string(char *buf, char *end, char *s, int field_width, int precisio
 	return buf;
 }
 
-static inline void *dereference_function_descriptor(void *ptr)
-{
-#if defined(CONFIG_IA64) || defined(CONFIG_PPC64)
-	void *p;
-	if (!probe_kernel_address(ptr, p))
-		ptr = p;
-#endif
-	return ptr;
-}
-
 static char *symbol_string(char *buf, char *end, void *ptr, int field_width, int precision, int flags)
 {
 	unsigned long value = (unsigned long) ptr;
-- 
cgit v1.2.3


From 2797982ed93c10d5585ee1842ab298cb11326ff5 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Wed, 10 Sep 2008 01:06:49 +0900
Subject: swiotlb: convert swiotlb to use is_buffer_dma_capable helper function

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 lib/swiotlb.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index b5f5d1133042..240a67c2c979 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -274,13 +274,13 @@ cleanup1:
 }
 
 static int
-address_needs_mapping(struct device *hwdev, dma_addr_t addr)
+address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
 	dma_addr_t mask = 0xffffffff;
 	/* If the device has a mask, use it, otherwise default to 32 bits */
 	if (hwdev && hwdev->dma_mask)
 		mask = *hwdev->dma_mask;
-	return (addr & ~mask) != 0;
+	return !is_buffer_dma_capable(mask, addr, size);
 }
 
 static int is_swiotlb_buffer(char *addr)
@@ -473,7 +473,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	int order = get_order(size);
 
 	ret = (void *)__get_free_pages(flags, order);
-	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret))) {
+	if (ret && address_needs_mapping(hwdev, virt_to_bus(ret), size)) {
 		/*
 		 * The allocated memory isn't reachable by the device.
 		 * Fall back on swiotlb_map_single().
@@ -497,7 +497,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 	dev_addr = virt_to_bus(ret);
 
 	/* Confirm address can be DMA'd by device */
-	if (address_needs_mapping(hwdev, dev_addr)) {
+	if (address_needs_mapping(hwdev, dev_addr, size)) {
 		printk("hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
 		       (unsigned long long)*hwdev->dma_mask,
 		       (unsigned long long)dev_addr);
@@ -561,7 +561,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	 * we can safely return the device addr and not worry about bounce
 	 * buffering it.
 	 */
-	if (!address_needs_mapping(hwdev, dev_addr) && !swiotlb_force)
+	if (!address_needs_mapping(hwdev, dev_addr, size) && !swiotlb_force)
 		return dev_addr;
 
 	/*
@@ -578,7 +578,7 @@ swiotlb_map_single_attrs(struct device *hwdev, void *ptr, size_t size,
 	/*
 	 * Ensure that the address returned is DMA'ble
 	 */
-	if (address_needs_mapping(hwdev, dev_addr))
+	if (address_needs_mapping(hwdev, dev_addr, size))
 		panic("map_single: bounce buffer is not DMA'ble");
 
 	return dev_addr;
@@ -721,7 +721,8 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 	for_each_sg(sgl, sg, nelems, i) {
 		addr = SG_ENT_VIRT_ADDRESS(sg);
 		dev_addr = virt_to_bus(addr);
-		if (swiotlb_force || address_needs_mapping(hwdev, dev_addr)) {
+		if (swiotlb_force ||
+		    address_needs_mapping(hwdev, dev_addr, sg->length)) {
 			void *map = map_single(hwdev, addr, sg->length, dir);
 			if (!map) {
 				/* Don't panic here, we expect map_sg users
-- 
cgit v1.2.3


From 36223a399f639b13b7a454349565934e6d3e2db0 Mon Sep 17 00:00:00 2001
From: Daniel J Blueman <daniel.blueman@gmail.com>
Date: Wed, 10 Sep 2008 21:07:55 +0100
Subject: swiotlb: fix back-off path when memory allocation fails

This fixes a SWIOTLB oops

With SWIOTLB being enabled and straight-forward page allocation
failure [1], the swiotlb_alloc_coherent fall-back path hits an
issue [2], resulting in my webcam failing to work.

At the time of oops, RDI is clearly a pointer to a structure which
has arrived as NULL, leading to the typo in swiotlb_map_single's
callsite arguments.

Correctly passing the device structure [3] addresses the issue and
gets my webcam working again (the allocation failure still occuring).

 --- [1]

skype: page allocation failure. order:3, mode:0x1
Pid: 5895, comm: skype Not tainted 2.6.27-rc6-235c-debug #1

Call Trace:
 [<ffffffff802b7cf0>] __alloc_pages_internal+0x4a0/0x5d0
 [<ffffffff802d5ddd>] alloc_pages_current+0xad/0x110
 [<ffffffff802b4ccd>] __get_free_pages+0x1d/0x60
 [<ffffffff8046cd39>] swiotlb_alloc_coherent+0x49/0x180
 [<ffffffff80212731>] dma_alloc_coherent+0x281/0x310
 [<ffffffff805621c0>] hcd_buffer_alloc+0x50/0x90
 [<ffffffff805547fd>] usb_buffer_alloc+0x2d/0x40
 [<ffffffffa0056763>] uvc_alloc_urb_buffers+0x53/0xf0 [uvcvideo]
 [<ffffffffa0056958>] uvc_init_video+0x158/0x3e0 [uvcvideo]
 [<ffffffffa0056c17>] uvc_video_enable+0x37/0x80 [uvcvideo]
 [<ffffffffa0055853>] uvc_v4l2_do_ioctl+0x723/0x1260 [uvcvideo]
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffffa0032c9f>] video_usercopy+0x19f/0x390 [videodev]
 [<ffffffffa0055130>] ? uvc_v4l2_do_ioctl+0x0/0x1260 [uvcvideo]
 [<ffffffff8026d0ce>] ? put_lock_stats+0xe/0x30
 [<ffffffffa0054dad>] uvc_v4l2_ioctl+0x4d/0x80 [uvcvideo]
 [<ffffffffa0045083>] native_ioctl+0x83/0x90 [compat_ioctl32]
 [<ffffffffa004534e>] v4l_compat_ioctl32+0x2be/0x1da4 [compat_ioctl32]
 [<ffffffff806aad21>] ? do_page_fault+0x3d1/0xae0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80270c59>] ? trace_hardirqs_on_caller+0x149/0x1b0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80329afa>] compat_sys_ioctl+0x8a/0x3c0
 [<ffffffff806a700d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
 [<ffffffff8022f816>] sysenter_dispatch+0x7/0x2c
 [<ffffffff806a6fce>] ? trace_hardirqs_on_thunk+0x3a/0x3f

Mem-Info:
Node 0 DMA per-cpu:
CPU    0: hi:    0, btch:   1 usd:   0
CPU    1: hi:    0, btch:   1 usd:   0
Node 0 DMA32 per-cpu:
CPU    0: hi:  186, btch:  31 usd:   3
CPU    1: hi:  186, btch:  31 usd:   0
Node 0 Normal per-cpu:
CPU    0: hi:  186, btch:  31 usd:  23
CPU    1: hi:  186, btch:  31 usd: 179
Active:78545 inactive:48683 dirty:31 writeback:0 unstable:2
 free:830202 slab:17516 mapped:17473 pagetables:3496 bounce:0
Node 0 DMA free:36kB min:28kB low:32kB high:40kB active:0kB
inactive:0kB present:15156kB pages_scanned:0 all_unreclaimable? no
lowmem_reserve[]: 0 3207 3956 3956
Node 0 DMA32 free:3197192kB min:6512kB low:8140kB high:9768kB
active:0kB inactive:0kB present:3284896kB pages_scanned:0
all_unreclaimable? no
lowmem_reserve[]: 0 0 748 748
Node 0 Normal free:123580kB min:1516kB low:1892kB high:2272kB
active:314180kB inactive:194732kB present:766464kB pages_scanned:0
all_unreclaimable? no
lowmem_reserve[]: 0 0 0 0
Node 0 DMA: 1*4kB 0*8kB 0*16kB 1*32kB 0*64kB 0*128kB 0*256kB 0*512kB
0*1024kB 0*2048kB 0*4096kB = 36kB
Node 0 DMA32: 4*4kB 3*8kB 2*16kB 3*32kB 4*64kB 5*128kB 3*256kB 5*512kB
4*1024kB 5*2048kB 776*4096kB = 3197224kB
Node 0 Normal: 14*4kB 14*8kB 8*16kB 6*32kB 1*64kB 3*128kB 3*256kB
2*512kB 4*1024kB 1*2048kB 28*4096kB = 123560kB
64847 total pagecache pages
0 pages in swap cache
Swap cache stats: add 0, delete 0, find 0/0
Free swap  = 502752kB
Total swap = 502752kB
1048576 pages RAM
52120 pages reserved
71967 pages shared
143004 pages non-shared

 --- [2]

BUG: unable to handle kernel NULL pointer dereference at 00000000000002c8
IP: [<ffffffff8046c84c>] map_single+0x1c/0x280
PGD 10e54e067 PUD 10e595067 PMD 0
Oops: 0000 [1] PREEMPT SMP DEBUG_PAGEALLOC
CPU 0
Modules linked in: kvm_intel kvm microcode uvcvideo compat_ioctl32
videodev v4l1_compat shpchp pci_hotplug
Pid: 5895, comm: skype Not tainted 2.6.27-rc6-235c-debug #1
RIP: 0010:[<ffffffff8046c84c>]  [<ffffffff8046c84c>] map_single+0x1c/0x280
RSP: 0018:ffff88010e78d988  EFLAGS: 00210296
RAX: 0000780000000000 RBX: 0000000000000000 RCX: 0000000000000002
RDX: 0000000000005000 RSI: 0000000000000000 RDI: 0000000000000000
RBP: ffff88010e78d9e8 R08: 0000000000000000 R09: 0000000000000001
R10: ffff88010e78d698 R11: 0000000000000001 R12: 0000000000000002
R13: 0000000000000000 R14: 0000000000005000 R15: ffff88012f1c9968
FS:  0000000000000000(0000) GS:ffffffff80a6cdc0(0063) knlGS:00000000f6355b90
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 00000000000002c8 CR3: 000000010e57d000 CR4: 00000000000026e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process skype (pid: 5895, threadinfo ffff88010e78c000, task ffff88012b9cc460)
Stack:  0000000200000000 0000000000005000 0000000000000000 0000000000000000
 00000000000017b8 0000000000000000 ffff88010e78d9c8 0000000000000000
 0000000000000002 0000000000000000 0000000000005000 ffff88012f1c9968
Call Trace:
 [<ffffffff8046cbb0>] swiotlb_map_single_attrs+0x60/0xf0
 [<ffffffff8046cc4c>] swiotlb_map_single+0xc/0x10
 [<ffffffff8046cdee>] swiotlb_alloc_coherent+0xfe/0x180
 [<ffffffff80212731>] dma_alloc_coherent+0x281/0x310
 [<ffffffff805621c0>] hcd_buffer_alloc+0x50/0x90
 [<ffffffff805547fd>] usb_buffer_alloc+0x2d/0x40
 [<ffffffffa0056763>] uvc_alloc_urb_buffers+0x53/0xf0 [uvcvideo]
 [<ffffffffa0056958>] uvc_init_video+0x158/0x3e0 [uvcvideo]
 [<ffffffffa0056c17>] uvc_video_enable+0x37/0x80 [uvcvideo]
 [<ffffffffa0055853>] uvc_v4l2_do_ioctl+0x723/0x1260 [uvcvideo]
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffff8026dd61>] ? trace_hardirqs_off_caller+0x21/0xc0
 [<ffffffffa0032c9f>] video_usercopy+0x19f/0x390 [videodev]
 [<ffffffffa0055130>] ? uvc_v4l2_do_ioctl+0x0/0x1260 [uvcvideo]
 [<ffffffff8026d0ce>] ? put_lock_stats+0xe/0x30
 [<ffffffffa0054dad>] uvc_v4l2_ioctl+0x4d/0x80 [uvcvideo]
 [<ffffffffa0045083>] native_ioctl+0x83/0x90 [compat_ioctl32]
 [<ffffffffa004534e>] v4l_compat_ioctl32+0x2be/0x1da4 [compat_ioctl32]
 [<ffffffff806aad21>] ? do_page_fault+0x3d1/0xae0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80270c59>] ? trace_hardirqs_on_caller+0x149/0x1b0
 [<ffffffff80270ccd>] ? trace_hardirqs_on+0xd/0x10
 [<ffffffff80329afa>] compat_sys_ioctl+0x8a/0x3c0
 [<ffffffff806a700d>] ? trace_hardirqs_off_thunk+0x3a/0x3c
 [<ffffffff8022f816>] sysenter_dispatch+0x7/0x2c
 [<ffffffff806a6fce>] ? trace_hardirqs_on_thunk+0x3a/0x3f

Code: 45 31 c0 48 89 e5 e8 a4 ff ff ff c9 c3 66 90 55 48 89 e5 41 57
41 56 41 55 41 54 53 48 83 ec 38 48 89 75 b0 48 89 55 a8 89 4d a4 <48>
8b 87 c8 02 00 00 48 85 c0 0f 84 1c 02 00 00 48 8b 58 08 48
RIP  [<ffffffff8046c84c>] map_single+0x1c/0x280
 RSP <ffff88010e78d988>
CR2: 00000000000002c8
---[ end trace 5d15baeeb7025a0e ]---

 --- [3]

ffffffff8046c830 <map_single>:
map_single():
/store/kernel/linux/lib/swiotlb.c:291
ffffffff8046c830:       55                      push   %rbp
ffffffff8046c831:       48 89 e5                mov    %rsp,%rbp
ffffffff8046c834:       41 57                   push   %r15
ffffffff8046c836:       41 56                   push   %r14
ffffffff8046c838:       41 55                   push   %r13
ffffffff8046c83a:       41 54                   push   %r12
ffffffff8046c83c:       53                      push   %rbx
ffffffff8046c83d:       48 83 ec 38             sub    $0x38,%rsp
ffffffff8046c841:       48 89 75 b0             mov    %rsi,-0x50(%rbp)
ffffffff8046c845:       48 89 55 a8             mov    %rdx,-0x58(%rbp)
ffffffff8046c849:       89 4d a4                mov    %ecx,-0x5c(%rbp)
dma_get_seg_boundary():
/store/kernel/linux/include/linux/dma-mapping.h:80
ffffffff8046c84c:       48 8b 87 c8 02 00 00    mov    0x2c8(%rdi),%rax <----

 --- [4]

Signed-off-by: Daniel J Blueman <daniel.blueman@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/swiotlb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 977edbdbc1de..8826fdf0f180 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -491,7 +491,7 @@ swiotlb_alloc_coherent(struct device *hwdev, size_t size,
 		 * the lowest available address range.
 		 */
 		dma_addr_t handle;
-		handle = swiotlb_map_single(NULL, NULL, size, DMA_FROM_DEVICE);
+		handle = swiotlb_map_single(hwdev, NULL, size, DMA_FROM_DEVICE);
 		if (swiotlb_dma_mapping_error(hwdev, handle))
 			return NULL;
 
-- 
cgit v1.2.3


From 50bed2e2862a8f3a4f7d683d0d27292e71ef18b9 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Thu, 11 Sep 2008 18:35:39 +0200
Subject: sg: disable interrupts inside sg_copy_buffer

The callers of sg_copy_buffer must disable interrupts before calling
it (since it uses kmap_atomic). Some callers use it on
interrupt-disabled code but some need to take the trouble to disable
interrupts just for this. No wonder they forget about it and we hit a
bug like:

http://bugzilla.kernel.org/show_bug.cgi?id=11529

James said that it might be better to disable interrupts inside the
function rather than risk the callers getting it wrong.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/scatterlist.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 876ba6d5b670..8d2688ff1352 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -422,9 +422,12 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 {
 	unsigned int offset = 0;
 	struct sg_mapping_iter miter;
+	unsigned long flags;
 
 	sg_miter_start(&miter, sgl, nents, SG_MITER_ATOMIC);
 
+	local_irq_save(flags);
+
 	while (sg_miter_next(&miter) && offset < buflen) {
 		unsigned int len;
 
@@ -442,6 +445,7 @@ static size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents,
 
 	sg_miter_stop(&miter);
 
+	local_irq_restore(flags);
 	return offset;
 }
 
-- 
cgit v1.2.3


From 07a2c01a0c2a0cb4581a67d50d4f17cb4d2457c4 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 19 Sep 2008 02:02:05 +0900
Subject: convert swiotlb to use dma_get_mask

swiotlb can use dma_get_mask() instead of the homegrown function.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: tony.luck@intel.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/dma-mapping.h | 2 +-
 lib/swiotlb.c               | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 0dba7433af18..ba9114ec5d3a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -65,7 +65,7 @@ static inline int is_buffer_dma_capable(u64 mask, dma_addr_t addr, size_t size)
 
 static inline u64 dma_get_mask(struct device *dev)
 {
-	if (dev->dma_mask && *dev->dma_mask)
+	if (dev && dev->dma_mask && *dev->dma_mask)
 		return *dev->dma_mask;
 	return DMA_32BIT_MASK;
 }
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 240a67c2c979..f8eebd489149 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -276,11 +276,7 @@ cleanup1:
 static int
 address_needs_mapping(struct device *hwdev, dma_addr_t addr, size_t size)
 {
-	dma_addr_t mask = 0xffffffff;
-	/* If the device has a mask, use it, otherwise default to 32 bits */
-	if (hwdev && hwdev->dma_mask)
-		mask = *hwdev->dma_mask;
-	return !is_buffer_dma_capable(mask, addr, size);
+	return !is_buffer_dma_capable(dma_get_mask(hwdev), addr, size);
 }
 
 static int is_swiotlb_buffer(char *addr)
-- 
cgit v1.2.3


From d26dbc5cf94b0a28acc947285c3b54814a73cb2e Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Mon, 22 Sep 2008 22:35:07 +0900
Subject: iommu: export iommu_area_reserve helper function

x86 has set_bit_string() that does the exact same thing that
set_bit_area() in lib/iommu-helper.c does.

This patch exports set_bit_area() in lib/iommu-helper.c as
iommu_area_reserve(), converts GART, Calgary, and AMD IOMMU to use it.

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/amd_iommu.c      | 2 +-
 arch/x86/kernel/pci-calgary_64.c | 2 +-
 arch/x86/kernel/pci-gart_64.c    | 2 +-
 include/linux/iommu-helper.h     | 1 +
 lib/iommu-helper.c               | 5 ++---
 5 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 6f7b97445738..70537d117a96 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -572,7 +572,7 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
 	if (start_page + pages > last_page)
 		pages = last_page - start_page;
 
-	set_bit_string(dom->bitmap, start_page, pages);
+	iommu_area_reserve(dom->bitmap, start_page, pages);
 }
 
 static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c
index fe7695e4caae..080d1d27f37a 100644
--- a/arch/x86/kernel/pci-calgary_64.c
+++ b/arch/x86/kernel/pci-calgary_64.c
@@ -261,7 +261,7 @@ static void iommu_range_reserve(struct iommu_table *tbl,
 			       badbit, tbl, start_addr, npages);
 	}
 
-	set_bit_string(tbl->it_map, index, npages);
+	iommu_area_reserve(tbl->it_map, index, npages);
 
 	spin_unlock_irqrestore(&tbl->it_lock, flags);
 }
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 508ef470b27f..3dcb1ad86e38 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -827,7 +827,7 @@ void __init gart_iommu_init(void)
 	 * Out of IOMMU space handling.
 	 * Reserve some invalid pages at the beginning of the GART.
 	 */
-	set_bit_string(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
+	iommu_area_reserve(iommu_gart_bitmap, 0, EMERGENCY_PAGES);
 
 	agp_memory_reserved = iommu_size;
 	printk(KERN_INFO
diff --git a/include/linux/iommu-helper.h b/include/linux/iommu-helper.h
index 58f41107e4ae..786539e432d7 100644
--- a/include/linux/iommu-helper.h
+++ b/include/linux/iommu-helper.h
@@ -11,6 +11,7 @@ static inline unsigned long iommu_device_max_index(unsigned long size,
 extern int iommu_is_span_boundary(unsigned int index, unsigned int nr,
 				  unsigned long shift,
 				  unsigned long boundary_size);
+extern void iommu_area_reserve(unsigned long *map, unsigned long i, int len);
 extern unsigned long iommu_area_alloc(unsigned long *map, unsigned long size,
 				      unsigned long start, unsigned int nr,
 				      unsigned long shift,
diff --git a/lib/iommu-helper.c b/lib/iommu-helper.c
index a3b8d4c3f77a..5d90074dca75 100644
--- a/lib/iommu-helper.c
+++ b/lib/iommu-helper.c
@@ -30,8 +30,7 @@ again:
 	return index;
 }
 
-static inline void set_bit_area(unsigned long *map, unsigned long i,
-				int len)
+void iommu_area_reserve(unsigned long *map, unsigned long i, int len)
 {
 	unsigned long end = i + len;
 	while (i < end) {
@@ -64,7 +63,7 @@ again:
 			start = index + 1;
 			goto again;
 		}
-		set_bit_area(map, index, nr);
+		iommu_area_reserve(map, index, nr);
 	}
 	return index;
 }
-- 
cgit v1.2.3


From 2133b5d7ff531bc15a923db4a6a50bf96c561be9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 2 Oct 2008 16:06:39 -0700
Subject: rcu: RCU-based detection of stalled CPUs for Classic RCU

This patch adds stalled-CPU detection to Classic RCU.  This capability
is enabled by a new config variable CONFIG_RCU_CPU_STALL_DETECTOR, which
defaults disabled.

This is a debugging feature to detect infinite loops in kernel code, not
something that non-kernel-hackers would be expected to care about.

This feature can detect looping CPUs in !PREEMPT builds and looping CPUs
with preemption disabled in PREEMPT builds.  This is essentially a port of
this functionality from the treercu patch, replacing the stall debug patch
that is already in tip/core/rcu (commit 67182ae1c4).

The changes from the patch in tip/core/rcu include making the config
variable name match that in treercu, changing from seconds to jiffies to
avoid spurious warnings, and printing a boot message when this feature
is enabled.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/rcuclassic.h |  12 +++-
 kernel/rcuclassic.c        | 166 +++++++++++++++++++++++----------------------
 lib/Kconfig.debug          |   2 +-
 3 files changed, 96 insertions(+), 84 deletions(-)

(limited to 'lib')

diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h
index 29bf528c7dcc..5f89b62e6983 100644
--- a/include/linux/rcuclassic.h
+++ b/include/linux/rcuclassic.h
@@ -40,15 +40,21 @@
 #include <linux/cpumask.h>
 #include <linux/seqlock.h>
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+#define RCU_SECONDS_TILL_STALL_CHECK	( 3 * HZ) /* for rcp->jiffies_stall */
+#define RCU_SECONDS_TILL_STALL_RECHECK	(30 * HZ) /* for rcp->jiffies_stall */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 /* Global control variables for rcupdate callback mechanism. */
 struct rcu_ctrlblk {
 	long	cur;		/* Current batch number.                      */
 	long	completed;	/* Number of the last completed batch         */
 	long	pending;	/* Number of the last pending batch           */
-#ifdef CONFIG_DEBUG_RCU_STALL
-	unsigned long gp_check;	/* Time grace period should end, in seconds.  */
-#endif /* #ifdef CONFIG_DEBUG_RCU_STALL */
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	unsigned long gp_start;	/* Time at which GP started in jiffies. */
+	unsigned long jiffies_stall;
+				/* Time at which to check for CPU stalls. */
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 
 	int	signaled;
 
diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c
index ed15128ca2c9..0d07e6e51578 100644
--- a/kernel/rcuclassic.c
+++ b/kernel/rcuclassic.c
@@ -164,6 +164,87 @@ static void __call_rcu(struct rcu_head *head, struct rcu_ctrlblk *rcp,
 	}
 }
 
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+	rcp->gp_start = jiffies;
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK;
+}
+
+static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	int cpu;
+	long delta;
+	unsigned long flags;
+
+	/* Only let one CPU complain about others per time interval. */
+
+	spin_lock_irqsave(&rcp->lock, flags);
+	delta = jiffies - rcp->jiffies_stall;
+	if (delta < 2 || rcp->cur != rcp->completed) {
+		spin_unlock_irqrestore(&rcp->lock, flags);
+		return;
+	}
+	rcp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+
+	/* OK, time to rat on our buddy... */
+
+	printk(KERN_ERR "RCU detected CPU stalls:");
+	for_each_possible_cpu(cpu) {
+		if (cpu_isset(cpu, rcp->cpumask))
+			printk(" %d", cpu);
+	}
+	printk(" (detected by %d, t=%ld jiffies)\n",
+	       smp_processor_id(), (long)(jiffies - rcp->gp_start));
+}
+
+static void print_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	unsigned long flags;
+
+	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu jiffies)\n",
+			smp_processor_id(), jiffies,
+			jiffies - rcp->gp_start);
+	dump_stack();
+	spin_lock_irqsave(&rcp->lock, flags);
+	if ((long)(jiffies - rcp->jiffies_stall) >= 0)
+		rcp->jiffies_stall =
+			jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
+	spin_unlock_irqrestore(&rcp->lock, flags);
+	set_need_resched();  /* kick ourselves to get things going. */
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp)
+{
+	long delta;
+
+	delta = jiffies - rcp->jiffies_stall;
+	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0) {
+
+		/* We haven't checked in, so go dump stack. */
+		print_cpu_stall(rcp);
+
+	} else if (rcp->cur != rcp->completed && delta >= 2) {
+
+		/* They had two seconds to dump stack, so complain. */
+		print_other_cpu_stall(rcp);
+	}
+}
+
+#else /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
+static void record_gp_stall_check_time(struct rcu_ctrlblk *rcp)
+{
+}
+
+static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
+{
+}
+
+#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
+
 /**
  * call_rcu - Queue an RCU callback for invocation after a grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -293,84 +374,6 @@ static void rcu_do_batch(struct rcu_data *rdp)
  *   period (if necessary).
  */
 
-#ifdef CONFIG_DEBUG_RCU_STALL
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-	rcp->gp_check = get_seconds() + 3;
-}
-
-static void print_other_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	int cpu;
-	long delta;
-	unsigned long flags;
-
-	/* Only let one CPU complain about others per time interval. */
-
-	spin_lock_irqsave(&rcp->lock, flags);
-	delta = get_seconds() - rcp->gp_check;
-	if (delta < 2L || cpus_empty(rcp->cpumask)) {
-		spin_unlock(&rcp->lock);
-		return;
-	}
-	rcp->gp_check = get_seconds() + 30;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-
-	/* OK, time to rat on our buddy... */
-
-	printk(KERN_ERR "RCU detected CPU stalls:");
-	for_each_cpu_mask(cpu, rcp->cpumask)
-		printk(" %d", cpu);
-	printk(" (detected by %d, t=%lu/%lu)\n",
-	       smp_processor_id(), get_seconds(), rcp->gp_check);
-}
-
-static void print_cpu_stall(struct rcu_ctrlblk *rcp)
-{
-	unsigned long flags;
-
-	printk(KERN_ERR "RCU detected CPU %d stall (t=%lu/%lu)\n",
-			smp_processor_id(), get_seconds(), rcp->gp_check);
-	dump_stack();
-	spin_lock_irqsave(&rcp->lock, flags);
-	if ((long)(get_seconds() - rcp->gp_check) >= 0L)
-		rcp->gp_check = get_seconds() + 30;
-	spin_unlock_irqrestore(&rcp->lock, flags);
-}
-
-static void check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-	long delta;
-
-	delta = get_seconds() - rcp->gp_check;
-	if (cpu_isset(smp_processor_id(), rcp->cpumask) && delta >= 0L) {
-
-		/* We haven't checked in, so go dump stack. */
-
-		print_cpu_stall(rcp);
-
-	} else {
-		if (!cpus_empty(rcp->cpumask) && delta >= 2L) {
-			/* They had two seconds to dump stack, so complain. */
-			print_other_cpu_stall(rcp);
-		}
-	}
-}
-
-#else /* #ifdef CONFIG_DEBUG_RCU_STALL */
-
-static inline void record_gp_check_time(struct rcu_ctrlblk *rcp)
-{
-}
-
-static inline void
-check_cpu_stall(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
-{
-}
-
-#endif /* #else #ifdef CONFIG_DEBUG_RCU_STALL */
-
 /*
  * Register a new batch of callbacks, and start it up if there is currently no
  * active batch and the batch to be registered has not already occurred.
@@ -381,7 +384,7 @@ static void rcu_start_batch(struct rcu_ctrlblk *rcp)
 	if (rcp->cur != rcp->pending &&
 			rcp->completed == rcp->cur) {
 		rcp->cur++;
-		record_gp_check_time(rcp);
+		record_gp_stall_check_time(rcp);
 
 		/*
 		 * Accessing nohz_cpu_mask before incrementing rcp->cur needs a
@@ -603,7 +606,7 @@ static void rcu_process_callbacks(struct softirq_action *unused)
 static int __rcu_pending(struct rcu_ctrlblk *rcp, struct rcu_data *rdp)
 {
 	/* Check for CPU stalls, if enabled. */
-	check_cpu_stall(rcp, rdp);
+	check_cpu_stall(rcp);
 
 	if (rdp->nxtlist) {
 		long completed_snap = ACCESS_ONCE(rcp->completed);
@@ -769,6 +772,9 @@ static struct notifier_block __cpuinitdata rcu_nb = {
  */
 void __init __rcu_init(void)
 {
+#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
+	printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
+#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
 	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	/* Register notifier for non-boot CPUs */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index ccede1aeab38..9fee969dd60e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -597,7 +597,7 @@ config RCU_TORTURE_TEST_RUNNABLE
 	  Say N here if you want the RCU torture tests to start only
 	  after being manually enabled via /proc.
 
-config RCU_CPU_STALL
+config RCU_CPU_STALL_DETECTOR
 	bool "Check for stalled CPUs delaying RCU grace periods"
 	depends on CLASSIC_RCU
 	default n
-- 
cgit v1.2.3


From 3c9f3681d0b4af09c1cbf04f92fdfb72bd81ad7b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Sun, 31 Aug 2008 10:13:54 -0500
Subject: [SCSI] lib: add generic helper to print sizes rounded to the correct
 SI range

This patch adds the ability to print sizes in either units of 10^3 (SI)
or 2^10 (Binary) units.  It rounds up to three significant figures and
can be used for either memory or storage capacities.

Oh, and I'm fully aware that 64 bits is only 16EiB ... the Zetta and
Yotta units are added for future proofing against the day we have 128
bit computers ...

[fujita.tomonori@lab.ntt.co.jp: fix missed unsigned long long cast]
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
---
 include/linux/string_helpers.h | 16 +++++++++++
 lib/Makefile                   |  3 +-
 lib/string_helpers.c           | 64 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/string_helpers.h
 create mode 100644 lib/string_helpers.c

(limited to 'lib')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
new file mode 100644
index 000000000000..a3eb2f65b656
--- /dev/null
+++ b/include/linux/string_helpers.h
@@ -0,0 +1,16 @@
+#ifndef _LINUX_STRING_HELPERS_H_
+#define _LINUX_STRING_HELPERS_H_
+
+#include <linux/types.h>
+
+/* Descriptions of the types of units to
+ * print in */
+enum string_size_units {
+	STRING_UNITS_10,	/* use powers of 10^3 (standard SI) */
+	STRING_UNITS_2,		/* use binary powers of 2^10 */
+};
+
+int string_get_size(u64 size, enum string_size_units units,
+		    char *buf, int len);
+
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 3b1f94bbe9de..44001af76a7d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -19,7 +19,8 @@ lib-$(CONFIG_SMP) += cpumask.o
 lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
-	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o
+	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
+	 string_helpers.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
new file mode 100644
index 000000000000..8347925030ff
--- /dev/null
+++ b/lib/string_helpers.c
@@ -0,0 +1,64 @@
+/*
+ * Helpers for formatting and printing strings
+ *
+ * Copyright 31 August 2008 James Bottomley
+ */
+#include <linux/kernel.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/string_helpers.h>
+
+/**
+ * string_get_size - get the size in the specified units
+ * @size:	The size to be converted
+ * @units:	units to use (powers of 1000 or 1024)
+ * @buf:	buffer to format to
+ * @len:	length of buffer
+ *
+ * This function returns a string formatted to 3 significant figures
+ * giving the size in the required units.  Returns 0 on success or
+ * error on failure.  @buf is always zero terminated.
+ *
+ */
+int string_get_size(u64 size, const enum string_size_units units,
+		    char *buf, int len)
+{
+	const char *units_10[] = { "B", "KB", "MB", "GB", "TB", "PB",
+				   "EB", "ZB", "YB", NULL};
+	const char *units_2[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB",
+				 "EiB", "ZiB", "YiB", NULL };
+	const char **units_str[] = {
+		[STRING_UNITS_10] =  units_10,
+		[STRING_UNITS_2] = units_2,
+	};
+	const int divisor[] = {
+		[STRING_UNITS_10] = 1000,
+		[STRING_UNITS_2] = 1024,
+	};
+	int i, j;
+	u64 remainder = 0, sf_cap;
+	char tmp[8];
+
+	tmp[0] = '\0';
+
+	for (i = 0; size > divisor[units] && units_str[units][i]; i++)
+		remainder = do_div(size, divisor[units]);
+
+	sf_cap = size;
+	for (j = 0; sf_cap*10 < 1000; j++)
+		sf_cap *= 10;
+
+	if (j) {
+		remainder *= 1000;
+		do_div(remainder, divisor[units]);
+		snprintf(tmp, sizeof(tmp), ".%03lld",
+			 (unsigned long long)remainder);
+		tmp[j+1] = '\0';
+	}
+
+	snprintf(buf, len, "%lld%s%s", (unsigned long long)size,
+		 tmp, units_str[units][i]);
+
+	return 0;
+}
+EXPORT_SYMBOL(string_get_size);
-- 
cgit v1.2.3


From a1ed5b0cffe4b16a93a6a3390e8cee0fbef94f86 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 25 Aug 2008 19:50:16 +0200
Subject: klist: don't iterate over deleted entries

A klist entry is kept on the list till all its current iterations are
finished; however, a new iteration after deletion also iterates over
deleted entries as long as their reference count stays above zero.
This causes problems for cases where there are users which iterate
over the list while synchronized against list manipulations and
natuarally expect already deleted entries to not show up during
iteration.

This patch implements dead flag which gets set on deletion so that
iteration can skip already deleted entries.  The dead flag piggy backs
on the lowest bit of knode->n_klist and only visible to klist
implementation proper.

While at it, drop klist_iter->i_head as it's redundant and doesn't
offer anything in semantics or performance wise as klist_iter->i_klist
is dereferenced on every iteration anyway.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/klist.h |  3 +-
 lib/klist.c           | 96 +++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 71 insertions(+), 28 deletions(-)

(limited to 'lib')

diff --git a/include/linux/klist.h b/include/linux/klist.h
index 06c338ef7f1b..8ea98db223e5 100644
--- a/include/linux/klist.h
+++ b/include/linux/klist.h
@@ -38,7 +38,7 @@ extern void klist_init(struct klist *k, void (*get)(struct klist_node *),
 		       void (*put)(struct klist_node *));
 
 struct klist_node {
-	struct klist		*n_klist;
+	void			*n_klist;	/* never access directly */
 	struct list_head	n_node;
 	struct kref		n_ref;
 	struct completion	n_removed;
@@ -57,7 +57,6 @@ extern int klist_node_attached(struct klist_node *n);
 
 struct klist_iter {
 	struct klist		*i_klist;
-	struct list_head	*i_head;
 	struct klist_node	*i_cur;
 };
 
diff --git a/lib/klist.c b/lib/klist.c
index cca37f96faa2..bbdd3015c2c7 100644
--- a/lib/klist.c
+++ b/lib/klist.c
@@ -37,6 +37,37 @@
 #include <linux/klist.h>
 #include <linux/module.h>
 
+/*
+ * Use the lowest bit of n_klist to mark deleted nodes and exclude
+ * dead ones from iteration.
+ */
+#define KNODE_DEAD		1LU
+#define KNODE_KLIST_MASK	~KNODE_DEAD
+
+static struct klist *knode_klist(struct klist_node *knode)
+{
+	return (struct klist *)
+		((unsigned long)knode->n_klist & KNODE_KLIST_MASK);
+}
+
+static bool knode_dead(struct klist_node *knode)
+{
+	return (unsigned long)knode->n_klist & KNODE_DEAD;
+}
+
+static void knode_set_klist(struct klist_node *knode, struct klist *klist)
+{
+	knode->n_klist = klist;
+	/* no knode deserves to start its life dead */
+	WARN_ON(knode_dead(knode));
+}
+
+static void knode_kill(struct klist_node *knode)
+{
+	/* and no knode should die twice ever either, see we're very humane */
+	WARN_ON(knode_dead(knode));
+	*(unsigned long *)&knode->n_klist |= KNODE_DEAD;
+}
 
 /**
  * klist_init - Initialize a klist structure.
@@ -79,7 +110,7 @@ static void klist_node_init(struct klist *k, struct klist_node *n)
 	INIT_LIST_HEAD(&n->n_node);
 	init_completion(&n->n_removed);
 	kref_init(&n->n_ref);
-	n->n_klist = k;
+	knode_set_klist(n, k);
 	if (k->get)
 		k->get(n);
 }
@@ -115,7 +146,7 @@ EXPORT_SYMBOL_GPL(klist_add_tail);
  */
 void klist_add_after(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -131,7 +162,7 @@ EXPORT_SYMBOL_GPL(klist_add_after);
  */
 void klist_add_before(struct klist_node *n, struct klist_node *pos)
 {
-	struct klist *k = pos->n_klist;
+	struct klist *k = knode_klist(pos);
 
 	klist_node_init(k, n);
 	spin_lock(&k->k_lock);
@@ -144,9 +175,10 @@ static void klist_release(struct kref *kref)
 {
 	struct klist_node *n = container_of(kref, struct klist_node, n_ref);
 
+	WARN_ON(!knode_dead(n));
 	list_del(&n->n_node);
 	complete(&n->n_removed);
-	n->n_klist = NULL;
+	knode_set_klist(n, NULL);
 }
 
 static int klist_dec_and_del(struct klist_node *n)
@@ -154,22 +186,29 @@ static int klist_dec_and_del(struct klist_node *n)
 	return kref_put(&n->n_ref, klist_release);
 }
 
-/**
- * klist_del - Decrement the reference count of node and try to remove.
- * @n: node we're deleting.
- */
-void klist_del(struct klist_node *n)
+static void klist_put(struct klist_node *n, bool kill)
 {
-	struct klist *k = n->n_klist;
+	struct klist *k = knode_klist(n);
 	void (*put)(struct klist_node *) = k->put;
 
 	spin_lock(&k->k_lock);
+	if (kill)
+		knode_kill(n);
 	if (!klist_dec_and_del(n))
 		put = NULL;
 	spin_unlock(&k->k_lock);
 	if (put)
 		put(n);
 }
+
+/**
+ * klist_del - Decrement the reference count of node and try to remove.
+ * @n: node we're deleting.
+ */
+void klist_del(struct klist_node *n)
+{
+	klist_put(n, true);
+}
 EXPORT_SYMBOL_GPL(klist_del);
 
 /**
@@ -206,7 +245,6 @@ void klist_iter_init_node(struct klist *k, struct klist_iter *i,
 			  struct klist_node *n)
 {
 	i->i_klist = k;
-	i->i_head = &k->k_list;
 	i->i_cur = n;
 	if (n)
 		kref_get(&n->n_ref);
@@ -237,7 +275,7 @@ EXPORT_SYMBOL_GPL(klist_iter_init);
 void klist_iter_exit(struct klist_iter *i)
 {
 	if (i->i_cur) {
-		klist_del(i->i_cur);
+		klist_put(i->i_cur, false);
 		i->i_cur = NULL;
 	}
 }
@@ -258,27 +296,33 @@ static struct klist_node *to_klist_node(struct list_head *n)
  */
 struct klist_node *klist_next(struct klist_iter *i)
 {
-	struct list_head *next;
-	struct klist_node *lnode = i->i_cur;
-	struct klist_node *knode = NULL;
 	void (*put)(struct klist_node *) = i->i_klist->put;
+	struct klist_node *last = i->i_cur;
+	struct klist_node *next;
 
 	spin_lock(&i->i_klist->k_lock);
-	if (lnode) {
-		next = lnode->n_node.next;
-		if (!klist_dec_and_del(lnode))
+
+	if (last) {
+		next = to_klist_node(last->n_node.next);
+		if (!klist_dec_and_del(last))
 			put = NULL;
 	} else
-		next = i->i_head->next;
+		next = to_klist_node(i->i_klist->k_list.next);
 
-	if (next != i->i_head) {
-		knode = to_klist_node(next);
-		kref_get(&knode->n_ref);
+	i->i_cur = NULL;
+	while (next != to_klist_node(&i->i_klist->k_list)) {
+		if (likely(!knode_dead(next))) {
+			kref_get(&next->n_ref);
+			i->i_cur = next;
+			break;
+		}
+		next = to_klist_node(next->n_node.next);
 	}
-	i->i_cur = knode;
+
 	spin_unlock(&i->i_klist->k_lock);
-	if (put && lnode)
-		put(lnode);
-	return knode;
+
+	if (put && last)
+		put(last);
+	return i->i_cur;
 }
 EXPORT_SYMBOL_GPL(klist_next);
-- 
cgit v1.2.3


From 870d6656126add8e383645732b03df2b7ccd4f94 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 25 Aug 2008 19:47:25 +0900
Subject: block: implement CONFIG_DEBUG_BLOCK_EXT_DEVT

Extended devt introduces non-contiguos device numbers.  This patch
implements a debug option which forces most devt allocations to be
from the extended area and spreads them out.  This is enabled by
default if DEBUG_KERNEL is set and achieves...

1. Detects code paths in kernel or userland which expect predetermined
   consecutive device numbers.

2. When something goes wrong, avoid corruption as adding to the minor
   of earlier partition won't lead to the wrong but valid device.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 38 +++++++++++++++++++++++++++++++++++---
 drivers/ide/ide-disk.c |  6 ++++++
 drivers/scsi/sd.c      |  6 ++++++
 lib/Kconfig.debug      | 16 ++++++++++++++++
 4 files changed, 63 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/block/genhd.c b/block/genhd.c
index ee4b13520e59..67e5a59ced2a 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -298,6 +298,38 @@ EXPORT_SYMBOL(unregister_blkdev);
 
 static struct kobj_map *bdev_map;
 
+/**
+ * blk_mangle_minor - scatter minor numbers apart
+ * @minor: minor number to mangle
+ *
+ * Scatter consecutively allocated @minor number apart if MANGLE_DEVT
+ * is enabled.  Mangling twice gives the original value.
+ *
+ * RETURNS:
+ * Mangled value.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+static int blk_mangle_minor(int minor)
+{
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+	int i;
+
+	for (i = 0; i < MINORBITS / 2; i++) {
+		int low = minor & (1 << i);
+		int high = minor & (1 << (MINORBITS - 1 - i));
+		int distance = MINORBITS - 1 - 2 * i;
+
+		minor ^= low | high;	/* clear both bits */
+		low <<= distance;	/* swap the positions */
+		high >>= distance;
+		minor |= low | high;	/* and set */
+	}
+#endif
+	return minor;
+}
+
 /**
  * blk_alloc_devt - allocate a dev_t for a partition
  * @part: partition to allocate dev_t for
@@ -339,7 +371,7 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt)
 		return -EBUSY;
 	}
 
-	*devt = MKDEV(BLOCK_EXT_MAJOR, idx);
+	*devt = MKDEV(BLOCK_EXT_MAJOR, blk_mangle_minor(idx));
 	return 0;
 }
 
@@ -361,7 +393,7 @@ void blk_free_devt(dev_t devt)
 
 	if (MAJOR(devt) == BLOCK_EXT_MAJOR) {
 		mutex_lock(&ext_devt_mutex);
-		idr_remove(&ext_devt_idr, MINOR(devt));
+		idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 		mutex_unlock(&ext_devt_mutex);
 	}
 }
@@ -473,7 +505,7 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
 		struct hd_struct *part;
 
 		mutex_lock(&ext_devt_mutex);
-		part = idr_find(&ext_devt_idr, MINOR(devt));
+		part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt)));
 		if (part && get_disk(part_to_disk(part))) {
 			*partno = part->partno;
 			disk = part_to_disk(part);
diff --git a/drivers/ide/ide-disk.c b/drivers/ide/ide-disk.c
index 7a88de9ada29..a072df5053ae 100644
--- a/drivers/ide/ide-disk.c
+++ b/drivers/ide/ide-disk.c
@@ -42,7 +42,13 @@
 #include <asm/div64.h>
 
 #define IDE_DISK_PARTS		(1 << PARTN_BITS)
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
 #define IDE_DISK_MINORS		IDE_DISK_PARTS
+#else
+#define IDE_DISK_MINORS		1
+#endif
+
 #define IDE_DISK_EXT_MINORS	(IDE_DISK_PARTS - IDE_DISK_MINORS)
 
 struct ide_disk_obj {
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index d1bb0e1d2d28..280d231a86ed 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -87,7 +87,13 @@ MODULE_ALIAS_SCSI_DEVICE(TYPE_MOD);
 MODULE_ALIAS_SCSI_DEVICE(TYPE_RBC);
 
 #define SD_PARTS	64
+
+#if !defined(CONFIG_DEBUG_BLOCK_EXT_DEVT)
 #define SD_MINORS	16
+#else
+#define SD_MINORS	1
+#endif
+
 #define SD_EXT_MINORS	(SD_PARTS - SD_MINORS)
 
 static int  sd_revalidate_disk(struct gendisk *);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0b504814e378..5a536f703a83 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -624,6 +624,22 @@ config BACKTRACE_SELF_TEST
 
 	  Say N if you are unsure.
 
+config DEBUG_BLOCK_EXT_DEVT
+        bool "Force extended block device numbers and spread them"
+	depends on DEBUG_KERNEL
+	depends on BLOCK
+	default y
+	help
+	  Conventionally, block device numbers are allocated from
+	  predetermined contiguous area.  However, extended block area
+	  may introduce non-contiguous block device numbers.  This
+	  option forces most block device numbers to be allocated from
+	  the extended space and spreads them to discover kernel or
+	  userland code paths which assume predetermined contiguous
+	  device number allocation.
+
+	  Say N if you are unsure.
+
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
 	depends on DEBUG_KERNEL
-- 
cgit v1.2.3


From 759f8ca3048f7438aa3129268d7252552505d662 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Fri, 29 Aug 2008 09:06:29 +0200
Subject: Change default value of CONFIG_DEBUG_BLOCK_EXT_DEVT to 'n'

It's a debug option that you would explicitly enable to test this
feature, we should default it to 'n' to prevent accidental surprises
for now.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 lib/Kconfig.debug | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 5a536f703a83..4378d5e923ca 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -628,7 +628,7 @@ config DEBUG_BLOCK_EXT_DEVT
         bool "Force extended block device numbers and spread them"
 	depends on DEBUG_KERNEL
 	depends on BLOCK
-	default y
+	default n
 	help
 	  Conventionally, block device numbers are allocated from
 	  predetermined contiguous area.  However, extended block area
-- 
cgit v1.2.3


From 55dc7db70a73a3809a2334063c9b5b0d8ccebdaa Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 1 Sep 2008 13:44:35 +0200
Subject: init: DEBUG_BLOCK_EXT_DEVT requires explicit root= param

DEBUG_BLOCK_EXT_DEVT shuffles SCSI and IDE device numbers and root
device number set using rdev become meaningless.  Root devices should
be explicitly specified using textual names.  Warn about it if root
can't be found and DEBUG_BLOCK_EXT_DEVT is enabled.  Also, add warning
to the help text.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 init/do_mounts.c  | 4 ++++
 lib/Kconfig.debug | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'lib')

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 3715feb8446d..d055b1914c3d 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -263,6 +263,10 @@ retry:
 		printk("Please append a correct \"root=\" boot option; here are the available partitions:\n");
 
 		printk_all_partitions();
+#ifdef CONFIG_DEBUG_BLOCK_EXT_DEVT
+		printk("DEBUG_BLOCK_EXT_DEVT is enabled, you need to specify "
+		       "explicit textual name for \"root=\" boot option.\n");
+#endif
 		panic("VFS: Unable to mount root fs on %s", b);
 	}
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4378d5e923ca..c556896abe57 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -638,6 +638,12 @@ config DEBUG_BLOCK_EXT_DEVT
 	  userland code paths which assume predetermined contiguous
 	  device number allocation.
 
+	  Note that turning on this debug option shuffles all the
+	  device numbers for all IDE and SCSI devices including libata
+	  ones, so root partition specified using device number
+	  directly (via rdev or root=MAJ:MIN) won't work anymore.
+	  Textual device names (root=/dev/sdXn) will continue to work.
+
 	  Say N if you are unsure.
 
 config LKDTM
-- 
cgit v1.2.3


From 581d4e28d9195aa8b2231383dbabc288988d615e Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Sun, 14 Sep 2008 05:56:33 -0700
Subject: block: add fault injection mechanism for faking request timeouts

Only works for the generic request timer handling. Allows one to
sporadically ignore request completions, thus exercising the timeout
handling.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-softirq.c    |  2 ++
 block/blk-timeout.c    | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++
 block/blk.h            | 12 ++++++++++
 block/genhd.c          |  8 +++++++
 include/linux/blkdev.h |  1 +
 lib/Kconfig.debug      | 13 ++++++++++-
 6 files changed, 94 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/block/blk-softirq.c b/block/blk-softirq.c
index 7ab344afb16f..e660d26ca656 100644
--- a/block/blk-softirq.c
+++ b/block/blk-softirq.c
@@ -154,6 +154,8 @@ do_local:
  **/
 void blk_complete_request(struct request *req)
 {
+	if (unlikely(blk_should_fake_timeout(req->q)))
+		return;
 	if (!blk_mark_rq_complete(req))
 		__blk_complete_request(req);
 }
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 6e5c781c5af1..9b4ad138bb33 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -4,9 +4,68 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/blkdev.h>
+#include <linux/fault-inject.h>
 
 #include "blk.h"
 
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+
+static DECLARE_FAULT_ATTR(fail_io_timeout);
+
+static int __init setup_fail_io_timeout(char *str)
+{
+	return setup_fault_attr(&fail_io_timeout, str);
+}
+__setup("fail_io_timeout=", setup_fail_io_timeout);
+
+int blk_should_fake_timeout(struct request_queue *q)
+{
+	if (!test_bit(QUEUE_FLAG_FAIL_IO, &q->queue_flags))
+		return 0;
+
+	return should_fail(&fail_io_timeout, 1);
+}
+
+static int __init fail_io_timeout_debugfs(void)
+{
+	return init_fault_attr_dentries(&fail_io_timeout, "fail_io_timeout");
+}
+
+late_initcall(fail_io_timeout_debugfs);
+
+ssize_t part_timeout_show(struct device *dev, struct device_attribute *attr,
+			  char *buf)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int set = test_bit(QUEUE_FLAG_FAIL_IO, &disk->queue->queue_flags);
+
+	return sprintf(buf, "%d\n", set != 0);
+}
+
+ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t count)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+	int val;
+
+	if (count) {
+		struct request_queue *q = disk->queue;
+		char *p = (char *) buf;
+
+		val = simple_strtoul(p, &p, 10);
+		spin_lock_irq(q->queue_lock);
+		if (val)
+			queue_flag_set(QUEUE_FLAG_FAIL_IO, q);
+		else
+			queue_flag_clear(QUEUE_FLAG_FAIL_IO, q);
+		spin_unlock_irq(q->queue_lock);
+	}
+
+	return count;
+}
+
+#endif /* CONFIG_FAIL_IO_TIMEOUT */
+
 /*
  * blk_delete_timer - Delete/cancel timer for a given function.
  * @req:	request that we are canceling timer for
diff --git a/block/blk.h b/block/blk.h
index a4f4a50aefaa..e5c579769963 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -42,6 +42,18 @@ static inline void blk_clear_rq_complete(struct request *rq)
 	clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags);
 }
 
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+int blk_should_fake_timeout(struct request_queue *);
+ssize_t part_timeout_show(struct device *, struct device_attribute *, char *);
+ssize_t part_timeout_store(struct device *, struct device_attribute *,
+				const char *, size_t);
+#else
+static inline int blk_should_fake_timeout(struct request_queue *q)
+{
+	return 0;
+}
+#endif
+
 struct io_context *current_io_context(gfp_t gfp_flags, int node);
 
 int ll_back_merge_fn(struct request_queue *q, struct request *req,
diff --git a/block/genhd.c b/block/genhd.c
index 8acaff0154e3..4cd3433c99ac 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -817,6 +817,11 @@ static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static struct device_attribute dev_attr_fail =
 	__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
 #endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+static struct device_attribute dev_attr_fail_timeout =
+	__ATTR(io-timeout-fail,  S_IRUGO|S_IWUSR, part_timeout_show,
+		part_timeout_store);
+#endif
 
 static struct attribute *disk_attrs[] = {
 	&dev_attr_range.attr,
@@ -828,6 +833,9 @@ static struct attribute *disk_attrs[] = {
 	&dev_attr_stat.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	&dev_attr_fail.attr,
+#endif
+#ifdef CONFIG_FAIL_IO_TIMEOUT
+	&dev_attr_fail_timeout.attr,
 #endif
 	NULL
 };
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b47767c72ce3..e34999d14c16 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -440,6 +440,7 @@ struct request_queue
 #define QUEUE_FLAG_BIDI		9	/* queue supports bidi requests */
 #define QUEUE_FLAG_NOMERGES    10	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP   11	/* force complete on same CPU */
+#define QUEUE_FLAG_FAIL_IO     12	/* fake timeout */
 
 static inline int queue_is_locked(struct request_queue *q)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c556896abe57..7d7a31d0ddeb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -683,10 +683,21 @@ config FAIL_PAGE_ALLOC
 
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
-	depends on FAULT_INJECTION
+	depends on FAULT_INJECTION && BLOCK
 	help
 	  Provide fault-injection capability for disk IO.
 
+config FAIL_IO_TIMEOUT
+	bool "Faul-injection capability for faking disk interrupts"
+	depends on FAULT_INJECTION && BLOCK
+	help
+	  Provide fault-injection capability on end IO handling. This
+	  will make the block layer "forget" an interrupt as configured,
+	  thus exercising the error handling.
+
+	  Only works with drivers that use the generic timeout handling,
+	  for others it wont do anything.
+
 config FAULT_INJECTION_DEBUG_FS
 	bool "Debugfs entries for fault-injection capabilities"
 	depends on FAULT_INJECTION && SYSFS && DEBUG_FS
-- 
cgit v1.2.3